diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java index c8bdd0a45cb..548e5e931b4 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java @@ -55,6 +55,7 @@ import org.apache.sysds.runtime.compress.lib.CLALibReExpand; import org.apache.sysds.runtime.compress.lib.CLALibRightMultBy; import org.apache.sysds.runtime.compress.lib.CLALibScalar; +import org.apache.sysds.runtime.compress.lib.CLALibSlice; import org.apache.sysds.runtime.compress.lib.CLALibSquash; import org.apache.sysds.runtime.compress.lib.CLALibUnary; import org.apache.sysds.runtime.controlprogram.caching.CacheBlock; @@ -691,61 +692,14 @@ public void setOverlapping(boolean overlapping) { @Override public MatrixBlock slice(int rl, int ru, int cl, int cu, boolean deep, CacheBlock ret) { validateSliceArgument(rl, ru, cl, cu); - MatrixBlock tmp; - if(rl == ru && cl == cu) { - // get a single index, and return in a matrixBlock - tmp = new MatrixBlock(1, 1, 0); - tmp.appendValue(0, 0, getValue(rl, cl)); - return tmp; - } - else if(rl == 0 && ru == getNumRows() - 1) { - tmp = sliceColumns(cl, cu); - tmp.recomputeNonZeros(); - return tmp; - } - else if(cl == 0 && cu == getNumColumns() - 1) { - // Row Slice. Potential optimization if the slice contains enough rows. - // +1 since the implementation arguments for slice is inclusive values for ru - // and cu. It is not inclusive in decompression, and construction of MatrixBlock. - tmp = new MatrixBlock(ru + 1 - rl, getNumColumns(), false).allocateDenseBlock(); - for(AColGroup g : getColGroups()) - g.decompressToBlock(tmp, rl, ru + 1, -rl, 0); - tmp.recomputeNonZeros(); - tmp.examSparsity(); - return tmp; - } - else { - // In the case where an internal matrix is sliced out, then first slice out the - // columns to an compressed intermediate. - tmp = sliceColumns(cl, cu); - // Then call slice recursively, to do the row slice. - // Since we do not copy the index structure but simply maintain a pointer to the - // original this is fine. - tmp = tmp.slice(rl, ru, 0, tmp.getNumColumns() - 1, ret); - return tmp; - } - } - - private CompressedMatrixBlock sliceColumns(int cl, int cu) { - CompressedMatrixBlock ret = new CompressedMatrixBlock(this.getNumRows(), cu + 1 - cl); - List newColGroups = new ArrayList<>(); - for(AColGroup grp : getColGroups()) { - AColGroup slice = grp.sliceColumns(cl, cu + 1); - if(slice != null) - newColGroups.add(slice); - } - ret.allocateColGroupList(newColGroups); - ret.recomputeNonZeros(); - ret.overlappingColGroups = this.isOverlapping(); - return ret; + return CLALibSlice.slice(this, rl, ru, cl, cu, deep); } @Override public void slice(ArrayList outlist, IndexRange range, int rowCut, int colCut, int blen, int boundaryRlen, int boundaryClen) { - printDecompressWarning( + MatrixBlock tmp = getUncompressed( "slice for distribution to spark. (Could be implemented such that it does not decompress)"); - MatrixBlock tmp = getUncompressed(); tmp.slice(outlist, range, rowCut, colCut, blen, boundaryRlen, boundaryClen); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java index 42ea6a711e9..97f6f0975d1 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java @@ -250,10 +250,6 @@ else if(mb.isEmpty()) { if(res == null) return abortCompression(); - if(compSettings.isInSparkInstruction) { - // clear soft reference to uncompressed block in case of spark. - res.clearSoftReferenceToDecompressed(); - } return new ImmutablePair<>(res, _stats); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java index 27a29cb945d..d46611aa96a 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java @@ -48,7 +48,7 @@ public abstract class AColGroup implements Serializable { /** Public super types of compression ColGroups supported */ public enum CompressionType { - UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC + UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, PFOR, } /** @@ -57,7 +57,7 @@ public enum CompressionType { * Protected such that outside the ColGroup package it should be unknown which specific subtype is used. */ protected enum ColGroupType { - UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCSingle, SDCSingleZeros, SDCZeros; + UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCSingle, SDCSingleZeros, SDCZeros, PFOR; } /** The ColGroup Indexes contained in the ColGroup */ @@ -132,14 +132,27 @@ public long estimateInMemorySize() { } /** - * Decompress the contents of the column group into the target matrix,. + * Decompress a range of rows into a sparse block * - * @param target A matrix block where the columns covered by this column group have not yet been filled in. - * @param rl Row to start decompression from - * @param ru Row to end decompression at (not inclusive) + * Note that this is using append, so the sparse column indexes need to be sorted afterwards. + * + * @param sb Sparse Target block + * @param rl Row to start at + * @param ru Row to end at + */ + public final void decompressToSparseBlock(SparseBlock sb, int rl, int ru) { + decompressToSparseBlock(sb, rl, ru, 0, 0); + } + + /** + * Decompress a range of rows into a dense block + * + * @param db Sparse Target block + * @param rl Row to start at + * @param ru Row to end at */ - public final void decompressToBlock(MatrixBlock target, int rl, int ru) { - decompressToBlock(target, rl, ru, 0, 0); + public final void decompressToDenseBlock(DenseBlock db, int rl, int ru) { + decompressToDenseBlock(db, rl, ru, 0, 0); } /** @@ -326,33 +339,29 @@ public double get(int r, int c) { protected abstract ColGroupType getColGroupType(); /** - * Decompress the contents of the column group without counting non zeros + * Decompress into the DenseBlock. (no NNZ handling) * - * The offsets helps us decompress into specific target areas of the output matrix. - * - * If OffR and OffC is 0, then decompression output starts at row offset equal to rl, + * @param db Target DenseBlock + * @param rl Row to start decompression from + * @param ru Row to end decompression at + * @param offR Row offset into the target to decompress + * @param offC Column offset into the target to decompress + */ + public abstract void decompressToDenseBlock(DenseBlock db, int rl, int ru, int offR, int offC); + + /** + * Decompress into the SparseBlock. (no NNZ handling) * - * If for instance a MiniBatch of rows 10 to 15, then target would be 5 rows high and arguments would look like: - * - * cg.decompressToBlock(target, 10, 15, -10, 0) + * Note this method is allowing to calls to append since it is assumed that the sparse column indexes are sorted + * afterwards * - * @param target a matrix block where the columns covered by this column group have not yet been filled in. - * @param rl Row to start decompression at. - * @param ru Row to end decompression at (not inclusive). - * @param offR RowOffset into target to assign from. - * @param offC ColumnOffset into the target matrix to assign from. + * @param sb Target SparseBlock + * @param rl Row to start decompression from + * @param ru Row to end decompression at + * @param offR Row offset into the target to decompress + * @param offC Column offset into the target to decompress */ - public final void decompressToBlock(MatrixBlock target, int rl, int ru, int offR, int offC){ - if(target.isInSparseFormat()) - decompressToSparseBlock(target.getSparseBlock(), rl, ru, offR, offC); - else - decompressToDenseBlock(target.getDenseBlock(), rl, ru, offR, offC); - } - - - protected abstract void decompressToDenseBlock(DenseBlock db, int rl, int ru,int offR, int offC); - - protected abstract void decompressToSparseBlock(SparseBlock sb, int rl, int ru, int offR, int offC); + public abstract void decompressToSparseBlock(SparseBlock sb, int rl, int ru, int offR, int offC); /** * Right matrix multiplication with this column group. diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java index 106a2df0677..90cd5c94e9a 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java @@ -55,11 +55,15 @@ protected AColGroupCompressed(int[] colIndices) { protected abstract void computeColMxx(double[] c, Builtin builtin); - protected abstract void computeSum(double[] c, int nRows, boolean square); + protected abstract void computeSum(double[] c, int nRows); - protected abstract void computeRowSums(double[] c, boolean square, int rl, int ru); + protected abstract void computeRowSums(double[] c, int rl, int ru); - protected abstract void computeColSums(double[] c, int nRows, boolean square); + protected abstract void computeSumSq(double[] c, int nRows); + + protected abstract void computeRowSumsSq(double[] c, int rl, int ru); + + protected abstract void computeColSumsSq(double[] c, int nRows); protected abstract void computeRowMxx(double[] c, Builtin builtin, int rl, int ru); @@ -79,22 +83,27 @@ public double getMax() { return computeMxx(Double.NEGATIVE_INFINITY, Builtin.getBuiltinFnObject(BuiltinCode.MAX)); } - @Override - public void computeColSums(double[] c, int nRows) { - computeColSums(c, nRows, false); - } - @Override public final void unaryAggregateOperations(AggregateUnaryOperator op, double[] c, int nRows, int rl, int ru) { final ValueFunction fn = op.aggOp.increOp.fn; if(fn instanceof Plus || fn instanceof KahanPlus || fn instanceof KahanPlusSq) { boolean square = fn instanceof KahanPlusSq; - if(op.indexFn instanceof ReduceAll) - computeSum(c, nRows, square); - else if(op.indexFn instanceof ReduceCol) - computeRowSums(c, square, rl, ru); - else if(op.indexFn instanceof ReduceRow) - computeColSums(c, nRows, square); + if(square){ + if(op.indexFn instanceof ReduceAll) + computeSumSq(c, nRows); + else if(op.indexFn instanceof ReduceCol) + computeRowSumsSq(c, rl, ru); + else if(op.indexFn instanceof ReduceRow) + computeColSumsSq(c, nRows); + } + else{ + if(op.indexFn instanceof ReduceAll) + computeSum(c, nRows); + else if(op.indexFn instanceof ReduceCol) + computeRowSums(c, rl, ru); + else if(op.indexFn instanceof ReduceRow) + computeColSums(c, nRows); + } } else if(fn instanceof Multiply) { if(op.indexFn instanceof ReduceAll) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupValue.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupValue.java index 067fa6f20f9..34abf61b05d 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupValue.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupValue.java @@ -38,7 +38,6 @@ import org.apache.sysds.runtime.functionobjects.Builtin; import org.apache.sysds.runtime.matrix.data.LibMatrixMult; import org.apache.sysds.runtime.matrix.data.MatrixBlock; -import org.apache.sysds.runtime.matrix.operators.ScalarOperator; /** * Base class for column groups encoded with value dictionary. This include column groups such as DDC OLE and RLE. @@ -171,7 +170,7 @@ protected abstract void decompressToSparseBlockDenseDictionary(SparseBlock ret, double[] values); @Override - public final int getNumValues() { + public int getNumValues() { return _dict.getNumberOfValues(_colIndexes.length); } @@ -286,15 +285,14 @@ private double[] rightMMPreAggSparse(int numVals, SparseBlock b, int[] aggregate } @Override - protected final double computeMxx(double c, Builtin builtin) { + protected double computeMxx(double c, Builtin builtin) { if(_zeros) c = builtin.execute(c, 0); return _dict.aggregate(c, builtin); - } @Override - protected final void computeColMxx(double[] c, Builtin builtin) { + protected void computeColMxx(double[] c, Builtin builtin) { if(_zeros) for(int x = 0; x < _colIndexes.length; x++) c[_colIndexes[x]] = builtin.execute(c[_colIndexes[x]], 0); @@ -302,40 +300,6 @@ protected final void computeColMxx(double[] c, Builtin builtin) { _dict.aggregateCols(c, builtin, _colIndexes); } - /** - * Method for use by subclasses. Applies a scalar operation to the value metadata stored in the dictionary. - * - * @param op scalar operation to perform - * @return transformed copy of value metadata for this column group - */ - protected final ADictionary applyScalarOp(ScalarOperator op) { - return _dict.clone().inplaceScalarOp(op); - } - - /** - * Method for use by subclasses. Applies a scalar operation to the value metadata stored in the dictionary. This - * specific method is used in cases where an new entry is to be added in the dictionary. - * - * Method should only be called if the newVal is not 0! Also the newVal should already have the operator applied. - * - * @param op The Operator to apply to the underlying data. - * @param newVal The new Value to append to the underlying data. - * @param numCols The number of columns in the ColGroup, to specify how many copies of the newVal should be appended. - * @return The new Dictionary containing the values. - */ - protected final ADictionary applyScalarOp(ScalarOperator op, double newVal, int numCols) { - return _dict.applyScalarOp(op, newVal, numCols); - } - - protected static double[] allocDVector(int len, boolean reset) { - return new double[len]; - } - - protected static int[] allocIVector(int len, boolean reset) { - LOG.error("deprecated allocIVector"); - return new int[len + 1]; - } - @Override public void readFields(DataInput in) throws IOException { super.readFields(in); @@ -362,16 +326,23 @@ public long getExactSizeOnDisk() { public abstract int[] getCounts(int[] out); @Override - protected final void computeSum(double[] c, int nRows, boolean square) { - if(square) - c[0] += _dict.sumsq(getCounts(), _colIndexes.length); - else - c[0] += _dict.sum(getCounts(), _colIndexes.length); + protected void computeSum(double[] c, int nRows) { + c[0] += _dict.sum(getCounts(), _colIndexes.length); + } + + @Override + public void computeColSums(double[] c, int nRows) { + _dict.colSum(c, getCounts(), _colIndexes); + } + + @Override + protected void computeSumSq(double[] c, int nRows) { + c[0] += _dict.sumSq(getCounts(), _colIndexes.length); } @Override - protected final void computeColSums(double[] c, int nRows, boolean square) { - _dict.colSum(c, getCounts(), _colIndexes, square); + protected void computeColSumsSq(double[] c, int nRows) { + _dict.colSumSq(c, getCounts(), _colIndexes); } @Override @@ -425,7 +396,7 @@ public AColGroupValue copy() { } @Override - protected final AColGroup sliceSingleColumn(int idx) { + protected AColGroup sliceSingleColumn(int idx) { final AColGroupValue ret = (AColGroupValue) copy(); ret._colIndexes = new int[] {0}; if(_colIndexes.length == 1) @@ -437,7 +408,7 @@ protected final AColGroup sliceSingleColumn(int idx) { } @Override - protected final AColGroup sliceMultiColumns(int idStart, int idEnd, int[] outputCols) { + protected AColGroup sliceMultiColumns(int idStart, int idEnd, int[] outputCols) { final AColGroupValue ret = (AColGroupValue) copy(); ret._dict = ret._dict.sliceOutColumnRange(idStart, idEnd, _colIndexes.length); ret._colIndexes = outputCols; @@ -445,20 +416,20 @@ protected final AColGroup sliceMultiColumns(int idStart, int idEnd, int[] output } @Override - protected final void tsmm(double[] result, int numColumns, int nRows) { + protected void tsmm(double[] result, int numColumns, int nRows) { final int[] counts = getCounts(); tsmm(result, numColumns, counts, _dict, _colIndexes); } @Override - public final boolean containsValue(double pattern) { + public boolean containsValue(double pattern) { if(pattern == 0 && _zeros) return true; return _dict.containsValue(pattern); } @Override - public final long getNumberNonZeros(int nRows) { + public long getNumberNonZeros(int nRows) { int[] counts = getCounts(); return _dict.getNumberNonZeros(counts, _colIndexes.length); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AMorphingMMColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AMorphingMMColGroup.java new file mode 100644 index 00000000000..26c055de9d7 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AMorphingMMColGroup.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.compress.colgroup; + +import org.apache.sysds.runtime.compress.DMLCompressionException; +import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.data.SparseBlock; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; + +/** + * Abstract class for column group types that do not perform matrix Multiplication, and decompression for performance + * reasons but instead transforms into another type of column group type to perform that operation. + */ +public abstract class AMorphingMMColGroup extends AColGroupValue { + + /** + * Constructor for serialization + * + * @param numRows Number of rows contained + */ + protected AMorphingMMColGroup(int numRows) { + super(numRows); + } + + /** + * A Abstract class for column groups that contain ADictionary for values. + * + * @param colIndices The Column indexes + * @param numRows The number of rows contained in this group + * @param dict The dictionary to contain the distinct tuples + * @param cachedCounts The cached counts of the distinct tuples (can be null since it should be possible to + * reconstruct the counts on demand) + */ + protected AMorphingMMColGroup(int[] colIndices, int numRows, ADictionary dict, int[] cachedCounts) { + super(colIndices, numRows, dict, cachedCounts); + } + + @Override + protected final void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, + SparseBlock sb) { + throw new DMLCompressionException("This method should never be called"); + } + + @Override + protected final void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, + double[] values) { + throw new DMLCompressionException("This method should never be called"); + } + + @Override + protected final void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, + SparseBlock sb) { + throw new DMLCompressionException("This method should never be called"); + } + + @Override + protected final void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, + double[] values) { + throw new DMLCompressionException("This method should never be called"); + } + + @Override + public final void leftMultByMatrix(MatrixBlock matrix, MatrixBlock result, int rl, int ru) { + throw new DMLCompressionException("This method should never be called"); + } + + @Override + public final void leftMultByAColGroup(AColGroup lhs, MatrixBlock result) { + throw new DMLCompressionException("This method should never be called"); + } + + @Override + public final void tsmmAColGroup(AColGroup other, MatrixBlock result) { + throw new DMLCompressionException("This method should never be called"); + } + + @Override + protected final void tsmm(double[] result, int numColumns, int nRows) { + throw new DMLCompressionException("This method should never be called"); + } + + public abstract AColGroup extractCommon(double[] constV); +} diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java index 2a15a2110bb..9d1b1e3712a 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java @@ -297,7 +297,7 @@ private boolean shouldPreAggregateLeft(APreAgg lhs) { private static MatrixBlock allocatePreAggregate(MatrixBlock m, int numVals, int rl, int ru) { final int lhsRows = ru - rl; - final double[] vals = allocDVector(lhsRows * numVals, true); + final double[] vals = new double[lhsRows * numVals]; final DenseBlock retB = new DenseBlockFP64(new int[] {lhsRows, numVals}, vals); return new MatrixBlock(lhsRows, numVals, retB); } @@ -318,16 +318,12 @@ private static void tsmmDictionaryWithScaling(final ADictionary dict, final int[ if(mb.isEmpty()) return; else if(mb.isInSparseFormat()) - throw new NotImplementedException(); - else { - final double[] values = mb.getDenseBlockValues(); - MMDictsDenseDenseWithScaling(values, values, rows, cols, counts, ret); - } - } - else { - final double[] values = dict.getValues(); - MMDictsDenseDenseWithScaling(values, values, rows, cols, counts, ret); + TSMMDictsSparseWithScaling(mb.getSparseBlock(), rows, cols, counts, ret); + else + TSMMDictsDenseWithScaling(mb.getDenseBlockValues(), rows, cols, counts, ret); } + else + TSMMDictsDenseWithScaling(dict.getValues(), rows, cols, counts, ret); } /** @@ -416,9 +412,9 @@ private static void MMDictsDenseDense(double[] left, double[] right, int[] rowsL } } - private static void MMDictsDenseDenseWithScaling(double[] left, double[] right, int[] rowsLeft, int[] colsRight, - int[] scaling, MatrixBlock result) { - final int commonDim = Math.min(left.length / rowsLeft.length, right.length / colsRight.length); + private static void TSMMDictsDenseWithScaling(double[] dv, int[] rowsLeft, int[] colsRight, int[] scaling, + MatrixBlock result) { + final int commonDim = Math.min(dv.length / rowsLeft.length, dv.length / colsRight.length); final int resCols = result.getNumColumns(); final double[] resV = result.getDenseBlockValues(); for(int k = 0; k < commonDim; k++) { @@ -427,10 +423,34 @@ private static void MMDictsDenseDenseWithScaling(double[] left, double[] right, final int scale = scaling[k]; for(int i = 0; i < rowsLeft.length; i++) { final int offOut = rowsLeft[i] * resCols; - final double vl = left[offL + i] * scale; + final double vl = dv[offL + i] * scale; if(vl != 0) for(int j = 0; j < colsRight.length; j++) - resV[offOut + colsRight[j]] += vl * right[offR + j]; + resV[offOut + colsRight[j]] += vl * dv[offR + j]; + } + } + } + + private static void TSMMDictsSparseWithScaling(SparseBlock sb, int[] rowsLeft, int[] colsRight, int[] scaling, + MatrixBlock result) { + + final int commonDim = sb.numRows(); + final int resCols = result.getNumColumns(); + final double[] resV = result.getDenseBlockValues(); + + for(int k = 0; k < commonDim; k++) { + if(sb.isEmpty(k)) + continue; + final int apos = sb.pos(k); + final int alen = sb.size(k) + apos; + final int[] aix = sb.indexes(k); + final double[] avals = sb.values(k); + final int scale = scaling[k]; + for(int i = apos; i < alen; i++) { + final double v = avals[i] * scale; + final int offOut = rowsLeft[aix[i]] * resCols; + for(int j = 0; j < alen; j++) + resV[offOut + colsRight[aix[j]]] += v * avals[j]; } } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java index 86335b983fa..afe43da66a8 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java @@ -54,21 +54,29 @@ protected ColGroupConst() { * @param colIndices The Colum indexes for the column group. * @param dict The dictionary containing one tuple for the entire compression. */ - protected ColGroupConst(int[] colIndices, ADictionary dict) { + private ColGroupConst(int[] colIndices, ADictionary dict) { super(colIndices); this._dict = dict; } - @Override - protected void computeRowSums(double[] c, boolean square, int rl, int ru) { - double vals = _dict.sumAllRowsToDouble(square, _colIndexes.length)[0]; - for(int rix = rl; rix < ru; rix++) - c[rix] += vals; + /** + * Create constructor for a ColGroup Const this constructor ensures that if the dictionary input is empty an Empty + * column group is constructed. + * + * @param colIndices The column indexes in the column group + * @param dict The dictionary to use + * @return A Colgroup either const or empty. + */ + protected static AColGroup create(int[] colIndices, ADictionary dict) { + if(dict == null) + return new ColGroupEmpty(colIndices); + else + return new ColGroupConst(colIndices, dict); } @Override protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru) { - double value = _dict.aggregateTuples(builtin, _colIndexes.length)[0]; + double value = _dict.aggregateRows(builtin, _colIndexes.length)[0]; for(int i = rl; i < ru; i++) c[i] = builtin.execute(c[i], value); } @@ -108,19 +116,17 @@ public double getIdx(int r, int colIdx) { @Override public AColGroup scalarOperation(ScalarOperator op) { - return new ColGroupConst(_colIndexes, _dict.clone().inplaceScalarOp(op)); + return create(_colIndexes, _dict.applyScalarOp(op)); } @Override public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) { - ADictionary ret = _dict.binOpLeft(op, v, _colIndexes); - return new ColGroupConst(_colIndexes, ret); + return create(_colIndexes, _dict.binOpLeft(op, v, _colIndexes)); } @Override public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) { - ADictionary ret = _dict.binOpRight(op, v, _colIndexes); - return new ColGroupConst(_colIndexes, ret); + return create(_colIndexes, _dict.binOpRight(op, v, _colIndexes)); } /** @@ -131,13 +137,12 @@ public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSa */ public void addToCommon(double[] constV) { final double[] values = _dict.getValues(); - if(values != null && constV != null) - for(int i = 0; i < _colIndexes.length; i++) - constV[_colIndexes[i]] += values[i]; + for(int i = 0; i < _colIndexes.length; i++) + constV[_colIndexes[i]] += values[i]; } public double[] getValues() { - return _dict != null ? _dict.getValues() : null; + return _dict.getValues(); } @Override @@ -151,17 +156,38 @@ protected void computeColMxx(double[] c, Builtin builtin) { } @Override - protected void computeSum(double[] c, int nRows, boolean square) { - if(_dict != null) - if(square) - c[0] += _dict.sumsq(new int[] {nRows}, _colIndexes.length); - else - c[0] += _dict.sum(new int[] {nRows}, _colIndexes.length); + protected void computeSum(double[] c, int nRows) { + c[0] += _dict.sum(new int[] {nRows}, _colIndexes.length); + } + + @Override + public void computeColSums(double[] c, int nRows) { + _dict.colSum(c, new int[] {nRows}, _colIndexes); + } + + @Override + protected void computeSumSq(double[] c, int nRows) { + + c[0] += _dict.sumSq(new int[] {nRows}, _colIndexes.length); + } + + @Override + protected void computeColSumsSq(double[] c, int nRows) { + _dict.colSumSq(c, new int[] {nRows}, _colIndexes); } @Override - protected void computeColSums(double[] c, int nRows, boolean square) { - _dict.colSum(c, new int[] {nRows}, _colIndexes, square); + protected void computeRowSums(double[] c, int rl, int ru) { + double vals = _dict.sumAllRowsToDouble(_colIndexes.length)[0]; + for(int rix = rl; rix < ru; rix++) + c[rix] += vals; + } + + @Override + protected void computeRowSumsSq(double[] c, int rl, int ru) { + double vals = _dict.sumAllRowsToDoubleSq(_colIndexes.length)[0]; + for(int rix = rl; rix < ru; rix++) + c[rix] += vals; } @Override @@ -183,11 +209,13 @@ public AColGroup rightMultByMatrix(MatrixBlock right) { final int cr = right.getNumColumns(); if(_colIndexes.length == rr) { MatrixBlock left = forceValuesToMatrixBlock(); + if(left.isEmpty()) + return null; MatrixBlock ret = new MatrixBlock(1, cr, false); LibMatrixMult.matrixMult(left, right, ret); - ADictionary d = new MatrixBlockDictionary(ret); if(ret.isEmpty()) return null; + ADictionary d = new MatrixBlockDictionary(ret); return ColGroupFactory.genColGroupConst(cr, d); } else { @@ -202,7 +230,7 @@ public void tsmm(double[] result, int numColumns, int nRows) { @Override public void leftMultByMatrix(MatrixBlock matrix, MatrixBlock result, int rl, int ru) { - throw new NotImplementedException(); + throw new DMLCompressionException("Should not be called"); } @Override @@ -223,19 +251,19 @@ protected AColGroup sliceSingleColumn(int idx) { return new ColGroupEmpty(colIndexes); else { ADictionary retD = new Dictionary(new double[] {_dict.getValue(idx)}); - return new ColGroupConst(colIndexes, retD); + return create(colIndexes, retD); } } @Override protected AColGroup sliceMultiColumns(int idStart, int idEnd, int[] outputCols) { ADictionary retD = _dict.sliceOutColumnRange(idStart, idEnd, _colIndexes.length); - return new ColGroupConst(outputCols, retD); + return create(outputCols, retD); } @Override public AColGroup copy() { - return new ColGroupConst(_colIndexes, _dict.clone()); + return create(_colIndexes, _dict.clone()); } @Override @@ -251,7 +279,7 @@ public long getNumberNonZeros(int nRows) { @Override public AColGroup replace(double pattern, double replace) { ADictionary replaced = _dict.replace(pattern, replace, _colIndexes.length); - return new ColGroupConst(_colIndexes, replaced); + return create(_colIndexes, replaced); } @Override @@ -269,9 +297,7 @@ public void write(DataOutput out) throws IOException { @Override public long getExactSizeOnDisk() { long ret = super.getExactSizeOnDisk(); - if(_dict != null) - ret += _dict.getExactSizeOnDisk(); - + ret += _dict.getExactSizeOnDisk(); return ret; } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java index b6d42312b98..82faecde164 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java @@ -67,26 +67,13 @@ public CompressionType getCompType() { protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, SparseBlock sb) { throw new NotImplementedException(); - // for(int i = rl; i < ru; i++, offT++) { - // final int rowIndex = _data.getIndex(i); - // if(sb.isEmpty(rowIndex)) - // continue; - // final double[] c = db.values(offT); - // final int off = db.pos(offT); - // final int apos = sb.pos(rowIndex); - // final int alen = sb.size(rowIndex) + apos; - // final double[] avals = sb.values(rowIndex); - // final int[] aix = sb.indexes(rowIndex); - // for(int j = apos; j < alen; j++) - // c[off + _colIndexes[aix[j]]] += avals[j]; - // } } @Override protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, double[] values) { final int nCol = _colIndexes.length; - for(int i = rl,offT = rl + offR; i < ru; i++, offT++) { + for(int i = rl, offT = rl + offR; i < ru; i++, offT++) { final double[] c = db.values(offT); final int off = db.pos(offT) + offC; final int rowIndex = _data.getIndex(i) * nCol; @@ -118,8 +105,15 @@ public double getIdx(int r, int colIdx) { } @Override - protected void computeRowSums(double[] c, boolean square, int rl, int ru) { - double[] vals = _dict.sumAllRowsToDouble(square, _colIndexes.length); + protected void computeRowSums(double[] c, int rl, int ru) { + double[] vals = _dict.sumAllRowsToDouble(_colIndexes.length); + for(int rix = rl; rix < ru; rix++) + c[rix] += vals[_data.getIndex(rix)]; + } + + @Override + protected void computeRowSumsSq(double[] c, int rl, int ru) { + double[] vals = _dict.sumAllRowsToDoubleSq(_colIndexes.length); for(int rix = rl; rix < ru; rix++) c[rix] += vals[_data.getIndex(rix)]; } @@ -127,7 +121,7 @@ protected void computeRowSums(double[] c, boolean square, int rl, int ru) { @Override protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru) { final int nCol = getNumCols(); - double[] preAggregatedRows = _dict.aggregateTuples(builtin, nCol); + double[] preAggregatedRows = _dict.aggregateRows(builtin, nCol); for(int i = rl; i < ru; i++) c[i] = builtin.execute(c[i], preAggregatedRows[_data.getIndex(i)]); } @@ -151,7 +145,7 @@ public void preAggregate(final MatrixBlock m, final MatrixBlock preAgg, final in @Override public void preAggregateDense(MatrixBlock m, MatrixBlock preAgg, int rl, int ru, int cl, int cu) { - _data.preAggregateDense(m, preAgg, rl, ru, cl, cu); + _data.preAggregateDense(m, preAgg.getDenseBlockValues(), rl, ru, cl, cu); } private void preAggregateSparse(SparseBlock sb, MatrixBlock preAgg, int rl, int ru) { @@ -181,11 +175,14 @@ public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) { public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) { final AIterator itThat = that._indexes.getIterator(); final int nCol = that._colIndexes.length; - - while(itThat.hasNext()) { + final int finalOff = that._indexes.getOffsetToLast(); + while(true) { final int to = _data.getIndex(itThat.value()); - final int fr = that._data.getIndex(itThat.getDataIndexAndIncrement()); + final int fr = that._data.getIndex(itThat.getDataIndex()); that._dict.addToEntry(ret, fr, to, nCol); + if(itThat.value() == finalOff) + break; + itThat.next(); } } @@ -193,9 +190,12 @@ public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) { final AIterator itThat = that._indexes.getIterator(); final int nCol = that._colIndexes.length; - while(itThat.hasNext()) { + final int finalOff = that._indexes.getOffsetToLast(); + while(true) { final int to = _data.getIndex(itThat.value()); that._dict.addToEntry(ret, 0, to, nCol); + if(itThat.value() == finalOff) + break; itThat.next(); } } @@ -219,7 +219,7 @@ public long estimateInMemorySize() { @Override public AColGroup scalarOperation(ScalarOperator op) { - return new ColGroupDDC(_colIndexes, _numRows, applyScalarOp(op), _data, getCachedCounts()); + return new ColGroupDDC(_colIndexes, _numRows, _dict.applyScalarOp(op), _data, getCachedCounts()); } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java index ec20674c43c..a75f046eb84 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java @@ -19,6 +19,8 @@ package org.apache.sysds.runtime.compress.colgroup; +import java.util.Arrays; + import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.data.DenseBlock; import org.apache.sysds.runtime.data.SparseBlock; @@ -69,7 +71,7 @@ public void decompressToDenseBlock(DenseBlock target, int rl, int ru, int offR, } @Override - public void decompressToSparseBlock(SparseBlock sb, int rl, int ru, int offR, int offC){ + public void decompressToSparseBlock(SparseBlock sb, int rl, int ru, int offR, int offC) { // do nothing. } @@ -80,10 +82,12 @@ public double getIdx(int r, int colIdx) { @Override public AColGroup scalarOperation(ScalarOperator op) { - double val0 = op.executeScalar(0); - if(val0 == 0) + final double v = op.executeScalar(0); + if(v == 0) return this; - return new ColGroupConst(_colIndexes, new Dictionary(new double[_colIndexes.length]).inplaceScalarOp(op)); + double[] retV = new double[_colIndexes.length]; + Arrays.fill(retV, v); + return ColGroupConst.create(_colIndexes, new Dictionary(retV)); } @Override @@ -99,7 +103,7 @@ public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSaf if(allZero) return this; - return new ColGroupConst(_colIndexes, new Dictionary(retVals)); + return ColGroupConst.create(_colIndexes, new Dictionary(retVals)); } @Override @@ -111,10 +115,10 @@ public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSa final int lenV = _colIndexes.length; boolean allZero = true; for(int i = 0; i < lenV; i++) - allZero = 0 == (retVals[i] = fn.execute(0, v[_colIndexes[i]])) && allZero ; + allZero = 0 == (retVals[i] = fn.execute(0, v[_colIndexes[i]])) && allZero; if(allZero) return this; - return new ColGroupConst(_colIndexes, new Dictionary(retVals)); + return ColGroupConst.create(_colIndexes, new Dictionary(retVals)); } @Override @@ -185,11 +189,6 @@ public final double getMax() { return 0; } - @Override - public void computeColSums(double[] c, int nRows) { - // do nothing - } - @Override protected double computeMxx(double c, Builtin builtin) { return builtin.execute(c, 0); @@ -202,17 +201,32 @@ protected void computeColMxx(double[] c, Builtin builtin) { } @Override - protected void computeSum(double[] c, int nRows, boolean square) { + protected void computeSum(double[] c, int nRows) { + // do nothing + } + + @Override + protected void computeRowSums(double[] c, int rl, int ru) { + // do nothing + } + + @Override + public void computeColSums(double[] c, int nRows) { + // do nothing + } + + @Override + protected void computeSumSq(double[] c, int nRows) { // do nothing } @Override - protected void computeRowSums(double[] c, boolean square, int rl, int ru) { + protected void computeRowSumsSq(double[] c, int rl, int ru) { // do nothing } @Override - protected void computeColSums(double[] c, int nRows, boolean square) { + protected void computeColSumsSq(double[] c, int nRows) { // do nothing } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java index fc0edf67fae..72779342445 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java @@ -147,7 +147,7 @@ public static AColGroup genColGroupConst(int[] cols, double[] values) { if(cols.length != values.length) throw new DMLCompressionException("Invalid size of values compared to columns"); ADictionary dict = new Dictionary(values); - return new ColGroupConst(cols, dict); + return ColGroupConst.create(cols, dict); } /** @@ -162,7 +162,7 @@ public static AColGroup genColGroupConst(int numCols, ADictionary dict) { throw new DMLCompressionException( "Invalid construction of const column group with different number of columns in arguments"); final int[] colIndices = Util.genColsIndices(numCols); - return new ColGroupConst(colIndices, dict); + return ColGroupConst.create(colIndices, dict); } private static List genEmpty(MatrixBlock in, CompressionSettings compSettings) { @@ -194,7 +194,7 @@ private static List compressColGroupsParallel(MatrixBlock in, Compres if(!tg.isEmpty()) tasks.add(new CompressTask(in, tg, compSettings, Math.max(1, k / 2))); - List ret = new ArrayList<>(csi.getNumberColGroups()); + List ret = new ArrayList<>(); for(Future> t : pool.invokeAll(tasks)) ret.addAll(t.get()); pool.shutdown(); @@ -234,11 +234,17 @@ protected CompressTask(MatrixBlock in, List groups, @Override public Collection call() { - ArrayList res = new ArrayList<>(); - Tmp tmpMap = new Tmp(); - for(CompressedSizeInfoColGroup g : _groups) - res.addAll(compressColGroup(_in, _compSettings, tmpMap, g, _k)); - return res; + try{ + ArrayList res = new ArrayList<>(); + Tmp tmpMap = new Tmp(); + for(CompressedSizeInfoColGroup g : _groups) + res.addAll(compressColGroup(_in, _compSettings, tmpMap, g, _k)); + return res; + } + catch(Exception e){ + e.printStackTrace(); + throw e; + } } } @@ -347,7 +353,7 @@ private static AColGroup compress(int[] colIndexes, int rlen, ABitmap ubm, Compr final IntArrayList[] of = ubm.getOffsetList(); if(of.length == 1 && of[0].size() == rlen) // If this always constant - return new ColGroupConst(colIndexes, DictionaryFactory.create(ubm)); + return ColGroupConst.create(colIndexes, DictionaryFactory.create(ubm)); switch(compType) { case DDC: @@ -490,7 +496,7 @@ private static AColGroup compressSDC(int[] colIndexes, int rlen, ABitmap ubm, Co ADictionary dict = DictionaryFactory.create(ubm, tupleSparsity); if(ubm.getNumValues() == 1) { if(numZeros >= largestOffset) { - final AOffset off = OffsetFactory.create(ubm.getOffsetList()[0].extractValues(true)); + final AOffset off = OffsetFactory.createOffset(ubm.getOffsetList()[0].extractValues(true)); return new ColGroupSDCSingleZeros(colIndexes, rlen, dict, off, null); } else { @@ -510,7 +516,7 @@ private static AColGroup setupMultiValueZeroColGroup(int[] colIndexes, int rlen, CompressionSettings cs) { IntArrayList[] offsets = ubm.getOffsetList(); AInsertionSorter s = InsertionSorterFactory.create(rlen, offsets, cs.sdcSortType); - AOffset indexes = OffsetFactory.create(s.getIndexes()); + AOffset indexes = OffsetFactory.createOffset(s.getIndexes()); AMapToData data = s.getData(); int[] counts = new int[offsets.length + 1]; int sum = 0; @@ -519,18 +525,16 @@ private static AColGroup setupMultiValueZeroColGroup(int[] colIndexes, int rlen, sum += counts[i]; } counts[offsets.length] = rlen - sum; - AColGroupValue ret = new ColGroupSDCZeros(colIndexes, rlen, dict, indexes, data, counts); - return ret; + return ColGroupSDCZeros.create(colIndexes, rlen, dict, indexes, data, counts); } private static AColGroup setupMultiValueColGroup(int[] colIndexes, int numZeros, int rlen, ABitmap ubm, int largestIndex, ADictionary dict, CompressionSettings cs) { IntArrayList[] offsets = ubm.getOffsetList(); AInsertionSorter s = InsertionSorterFactory.createNegative(rlen, offsets, largestIndex, cs.sdcSortType); - AOffset indexes = OffsetFactory.create(s.getIndexes()); + AOffset indexes = OffsetFactory.createOffset(s.getIndexes()); AMapToData _data = s.getData(); - AColGroupValue ret = new ColGroupSDC(colIndexes, rlen, dict, indexes, _data, null); - return ret; + return ColGroupSDC.create(colIndexes, rlen, dict, indexes, _data, null); } private static AColGroup setupSingleValueSDCColGroup(int[] colIndexes, int rlen, ABitmap ubm, ADictionary dict) { @@ -548,7 +552,7 @@ private static AColGroup setupSingleValueSDCColGroup(int[] colIndexes, int rlen, while(v < rlen) indexes[p++] = v++; - AOffset off = OffsetFactory.create(indexes); + AOffset off = OffsetFactory.createOffset(indexes); return new ColGroupSDCSingle(colIndexes, rlen, dict, off, null); } @@ -635,14 +639,14 @@ private static AColGroup compressSDCFromSparseTransposedBlock(MatrixBlock mb, in } counts[entries.size()] = rlen - sum; - final AOffset offsets = OffsetFactory.create(sb.indexes(sbRow), apos, alen); + final AOffset offsets = OffsetFactory.createOffset(sb.indexes(sbRow), apos, alen); if(entries.size() <= 1) return new ColGroupSDCSingleZeros(cols, rlen, new Dictionary(dict), offsets, counts); else { final AMapToData mapToData = MapToFactory.create((alen - apos), entries.size()); for(int j = apos; j < alen; j++) mapToData.set(j - apos, map.get(vals[j])); - return new ColGroupSDCZeros(cols, rlen, new Dictionary(dict), offsets, mapToData, counts); + return ColGroupSDCZeros.create(cols, rlen, new Dictionary(dict), offsets, mapToData, counts); } } else { diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java index f8edbcb1975..184ca1a69c2 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java @@ -118,6 +118,8 @@ private static AColGroup constructColGroup(ColGroupType ctype, int nRows){ return new ColGroupSDCSingleZeros(nRows); case SDCZeros: return new ColGroupSDCZeros(nRows); + case PFOR: + return new ColGroupPFOR(nRows); default: throw new DMLRuntimeException("Unsupported ColGroup Type used: " + ctype); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java index a303d98910c..64dd626bc17 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java @@ -23,7 +23,6 @@ import org.apache.commons.lang.NotImplementedException; import org.apache.sysds.runtime.compress.CompressionSettings; -import org.apache.sysds.runtime.compress.DMLCompressionException; import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; import org.apache.sysds.runtime.data.DenseBlock; import org.apache.sysds.runtime.data.SparseBlock; @@ -66,7 +65,8 @@ public ColGroupType getColGroupType() { } @Override - protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, double[] values) { + protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, + double[] values) { throw new NotImplementedException(); // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; // final int numCols = getNumCols(); @@ -79,33 +79,34 @@ protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int // double[] c = target.getDenseBlockValues(); // // cache conscious append via horizontal scans // for(int bi = (rl / blksz) * blksz; bi < ru; bi += blksz) { - // for(int k = 0, off = 0; k < numVals; k++, off += numCols) { - // int boff = _ptr[k]; - // int blen = len(k); - // int bix = apos[k]; - - // if(bix >= blen) - // continue; - // int pos = boff + bix; - // int len = _data[pos]; - // int i = 1; - // int row = bi + _data[pos + 1]; - // while(i <= len && row < rl) - // row = bi + _data[pos + i++]; - - // for(; i <= len && row < ru; i++) { - // row = bi + _data[pos + i]; - // int rc = (row - offOut) * targetCols; - // for(int j = 0; j < numCols; j++) - // c[rc + _colIndexes[j]] += values[off + j]; - // } - // apos[k] += len + 1; - // } + // for(int k = 0, off = 0; k < numVals; k++, off += numCols) { + // int boff = _ptr[k]; + // int blen = len(k); + // int bix = apos[k]; + + // if(bix >= blen) + // continue; + // int pos = boff + bix; + // int len = _data[pos]; + // int i = 1; + // int row = bi + _data[pos + 1]; + // while(i <= len && row < rl) + // row = bi + _data[pos + i++]; + + // for(; i <= len && row < ru; i++) { + // row = bi + _data[pos + i]; + // int rc = (row - offOut) * targetCols; + // for(int j = 0; j < numCols; j++) + // c[rc + _colIndexes[j]] += values[off + j]; + // } + // apos[k] += len + 1; + // } // } } @Override - protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, SparseBlock values) { + protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, + SparseBlock values) { throw new NotImplementedException(); } @@ -148,7 +149,7 @@ public AColGroup scalarOperation(ScalarOperator op) { // fast path: sparse-safe operations // Note that bitmaps don't change and are shallow-copied if(op.sparseSafe || val0 == 0 || !_zeros) { - return new ColGroupOLE(_colIndexes, _numRows, _zeros, applyScalarOp(op), _data, _ptr, getCachedCounts()); + return new ColGroupOLE(_colIndexes, _numRows, _zeros, _dict.applyScalarOp(op), _data, _ptr, getCachedCounts()); } // slow path: sparse-unsafe operations (potentially create new bitmap) // note: for efficiency, we currently don't drop values that become 0 @@ -156,10 +157,10 @@ public AColGroup scalarOperation(ScalarOperator op) { int[] loff = computeOffsets(lind); if(loff.length == 0) { // empty offset list: go back to fast path - return new ColGroupOLE(_colIndexes, _numRows, false, applyScalarOp(op), _data, _ptr, getCachedCounts()); + return new ColGroupOLE(_colIndexes, _numRows, false, _dict.applyScalarOp(op), _data, _ptr, getCachedCounts()); } - ADictionary rvalues = applyScalarOp(op, val0, getNumCols()); + ADictionary rvalues = _dict.applyScalarOp(op, val0, getNumCols()); char[] lbitmap = genOffsetBitmap(loff, loff.length); char[] rbitmaps = Arrays.copyOf(_data, _data.length + lbitmap.length); System.arraycopy(lbitmap, 0, rbitmaps, _data.length, lbitmap.length); @@ -216,69 +217,74 @@ public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSa // } @Override - protected void computeRowSums(double[] c, boolean square, int rl, int ru) { + protected void computeRowSums(double[] c, int rl, int ru) { + throw new NotImplementedException(); + // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; + // final int numVals = getNumValues(); - final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; - final int numVals = getNumValues(); + // if(numVals > 1 && _numRows > blksz) { + // final int blksz2 = CompressionSettings.BITMAP_BLOCK_SZ; - if(numVals > 1 && _numRows > blksz) { - final int blksz2 = CompressionSettings.BITMAP_BLOCK_SZ; - - // step 1: prepare position and value arrays - int[] apos = skipScan(numVals, rl); - double[] aval = _dict.sumAllRowsToDouble(square, _colIndexes.length); - - // step 2: cache conscious row sums via horizontal scans - for(int bi = (rl / blksz) * blksz; bi < ru; bi += blksz2) { - int bimax = Math.min(bi + blksz2, ru); - - // horizontal segment scan, incl pos maintenance - for(int k = 0; k < numVals; k++) { - int boff = _ptr[k]; - int blen = len(k); - double val = aval[k]; - int bix = apos[k]; - - for(int ii = bi; ii < bimax && bix < blen; ii += blksz) { - // prepare length, start, and end pos - int len = _data[boff + bix]; - - // compute partial results - for(int i = 1; i <= len; i++) { - int rix = ii + _data[boff + bix + i]; - if(rix >= _numRows) - throw new DMLCompressionException("Invalid row " + rix); - c[rix] += val; - } - bix += len + 1; - } + // // step 1: prepare position and value arrays + // int[] apos = skipScan(numVals, rl); + // double[] aval = _dict.sumAllRowsToDouble(square, _colIndexes.length); + + // // step 2: cache conscious row sums via horizontal scans + // for(int bi = (rl / blksz) * blksz; bi < ru; bi += blksz2) { + // int bimax = Math.min(bi + blksz2, ru); + + // // horizontal segment scan, incl pos maintenance + // for(int k = 0; k < numVals; k++) { + // int boff = _ptr[k]; + // int blen = len(k); + // double val = aval[k]; + // int bix = apos[k]; + + // for(int ii = bi; ii < bimax && bix < blen; ii += blksz) { + // // prepare length, start, and end pos + // int len = _data[boff + bix]; + + // // compute partial results + // for(int i = 1; i <= len; i++) { + // int rix = ii + _data[boff + bix + i]; + // if(rix >= _numRows) + // throw new DMLCompressionException("Invalid row " + rix); + // c[rix] += val; + // } + // bix += len + 1; + // } - apos[k] = bix; - } - } - } - else { - // iterate over all values and their bitmaps - for(int k = 0; k < numVals; k++) { - // prepare value-to-add for entire value bitmap - int boff = _ptr[k]; - int blen = len(k); - double val = _dict.sumRow(k, square, _colIndexes.length); + // apos[k] = bix; + // } + // } + // } + // else { + // // iterate over all values and their bitmaps + // for(int k = 0; k < numVals; k++) { + // // prepare value-to-add for entire value bitmap + // int boff = _ptr[k]; + // int blen = len(k); + // double val = _dict.sumRow(k, square, _colIndexes.length); + + // // iterate over bitmap blocks and add values + // if(val != 0) { + // int slen; + // int bix = skipScanVal(k, rl); + // for(int off = ((rl + 1) / blksz) * blksz; bix < blen && off < ru; bix += slen + 1, off += blksz) { + // slen = _data[boff + bix]; + // for(int i = 1; i <= slen; i++) { + // int rix = off + _data[boff + bix + i]; + // c[rix] += val; + // } + // } + // } + // } + // } + } - // iterate over bitmap blocks and add values - if(val != 0) { - int slen; - int bix = skipScanVal(k, rl); - for(int off = ((rl + 1) / blksz) * blksz; bix < blen && off < ru; bix += slen + 1, off += blksz) { - slen = _data[boff + bix]; - for(int i = 1; i <= slen; i++) { - int rix = off + _data[boff + bix + i]; - c[rix] += val; - } - } - } - } - } + @Override + protected void computeRowSumsSq(double[] c, int rl, int ru) { + throw new NotImplementedException(); } @Override @@ -413,7 +419,7 @@ else if(_data[boff + bix + blckIx] > offset) private int[] skipScan(int numVals, int rl) { final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; rl = (rl / blksz) * blksz; - int[] ret = allocIVector(numVals, rl == 0); + int[] ret = new int[numVals]; if(rl > 0) { // rl aligned with blksz for(int k = 0; k < numVals; k++) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupPFOR.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupPFOR.java new file mode 100644 index 00000000000..e858addbc27 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupPFOR.java @@ -0,0 +1,386 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.compress.colgroup; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.Arrays; + +import org.apache.commons.lang.NotImplementedException; +import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; +import org.apache.sysds.runtime.compress.colgroup.offset.AIterator; +import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; +import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory; +import org.apache.sysds.runtime.functionobjects.Builtin; +import org.apache.sysds.runtime.functionobjects.Divide; +import org.apache.sysds.runtime.functionobjects.Minus; +import org.apache.sysds.runtime.functionobjects.Multiply; +import org.apache.sysds.runtime.functionobjects.Plus; +import org.apache.sysds.runtime.matrix.operators.BinaryOperator; +import org.apache.sysds.runtime.matrix.operators.ScalarOperator; + +/** + * ColGroup for Patched Frame Of Reference. + * + * This column group fits perfectly into the collection of compression groups + * + * It can be constructed when a SDCZeros group get a non zero default value. Then a natural extension is to transform + * the group into a PFOR group, since the default value is then treated as an offset, and the dictionary can be copied + * with no modifications. + * + */ +public class ColGroupPFOR extends AMorphingMMColGroup { + + private static final long serialVersionUID = 3883228464052204203L; + + /** Sparse row indexes for the data that is nonZero */ + protected AOffset _indexes; + + /** Pointers to row indexes in the dictionary. */ + protected transient AMapToData _data; + + /** Reference values in this column group */ + protected double[] _reference; + + /** + * Constructor for serialization + * + * @param numRows Number of rows contained + */ + protected ColGroupPFOR(int numRows) { + super(numRows); + } + + private ColGroupPFOR(int[] colIndices, int numRows, ADictionary dict, AOffset indexes, AMapToData data, + int[] cachedCounts, double[] reference) { + super(colIndices, numRows, dict, cachedCounts); + _data = data; + _indexes = indexes; + _zeros = allZero(reference); + _reference = reference; + } + + protected static AColGroup create(int[] colIndices, int numRows, ADictionary dict, AOffset indexes, AMapToData data, + int[] cachedCounts, double[] reference) { + if(dict == null) { + // either ColGroupEmpty or const + boolean allZero = true; + for(double d : reference) + if(d != 0) { + allZero = false; + break; + } + + if(allZero) + return new ColGroupEmpty(colIndices); + else + return ColGroupFactory.genColGroupConst(colIndices, reference); + } + return new ColGroupPFOR(colIndices, numRows, dict, indexes, data, cachedCounts, reference); + } + + private final static boolean allZero(double[] in) { + for(double v : in) + if(v != 0) + return false; + return true; + } + + @Override + public CompressionType getCompType() { + return CompressionType.PFOR; + } + + @Override + public ColGroupType getColGroupType() { + return ColGroupType.PFOR; + } + + @Override + public int[] getCounts(int[] counts) { + return _data.getCounts(counts, _numRows); + } + + @Override + protected void computeRowSums(double[] c, int rl, int ru) { + // Add reference value sum. + final double refSum = refSum(); + for(int rix = rl; rix < ru; rix++) + c[rix] += refSum; + + final double[] vals = _dict.sumAllRowsToDouble(_colIndexes.length); + ColGroupSDCZeros.computeRowSums(c, rl, ru, vals, _data, _indexes, _numRows); + } + + private final double refSum() { + double ret = 0; + for(double d : _reference) + ret += d; + return ret; + } + + @Override + protected void computeRowSumsSq(double[] c, int rl, int ru) { + final double[] vals = _dict.sumAllRowsToDoubleSq(_reference); + ColGroupSDC.computeRowSumsSq(c, rl, ru, vals, _data, _indexes, _numRows); + } + + @Override + protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru) { + final double[] vals = _dict.aggregateRows(builtin, _reference); + ColGroupSDC.computeRowMxx(c, builtin, rl, ru, vals, _data, _indexes, _numRows, vals[vals.length - 1]); + } + + @Override + public double getIdx(int r, int colIdx) { + final AIterator it = _indexes.getIterator(r); + final int nCol = _colIndexes.length; + if(it.value() == r) { + final int rowOff = _data.getIndex(it.getDataIndex()) * nCol; + return _dict.getValue(rowOff + colIdx) + _reference[colIdx]; + } + else + return _reference[colIdx]; + } + + @Override + public AColGroup scalarOperation(ScalarOperator op) { + final double[] newRef = new double[_reference.length]; + for(int i = 0; i < _reference.length; i++) + newRef[i] = op.executeScalar(_reference[i]); + if(op.fn instanceof Plus || op.fn instanceof Minus) { + return create(_colIndexes, _numRows, _dict, _indexes, _data, getCachedCounts(), newRef); + } + else if(op.fn instanceof Multiply || op.fn instanceof Divide) { + final ADictionary newDict = _dict.applyScalarOp(op); + return create(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts(), newRef); + } + else { + final ADictionary newDict = _dict.applyScalarOp(op, _reference, newRef); + return create(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts(), newRef); + } + } + + @Override + public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) { + final double[] newRef = new double[_reference.length]; + for(int i = 0; i < _reference.length; i++) + newRef[i] = op.fn.execute(v[_colIndexes[i]], _reference[i]); + + if(op.fn instanceof Plus || op.fn instanceof Minus) + return create(_colIndexes, _numRows, _dict, _indexes, _data, getCachedCounts(), newRef); + else if(op.fn instanceof Multiply || op.fn instanceof Divide) { + final ADictionary newDict = _dict.binOpLeft(op, v, _colIndexes); + return create(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts(), newRef); + } + else { + final ADictionary newDict = _dict.binOpLeft(op, v, _colIndexes, _reference, newRef); + return create(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts(), newRef); + } + } + + @Override + public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) { + final double[] newRef = new double[_reference.length]; + for(int i = 0; i < _reference.length; i++) + newRef[i] = op.fn.execute(_reference[i], v[_colIndexes[i]]); + if(op.fn instanceof Plus || op.fn instanceof Minus) + return new ColGroupPFOR(_colIndexes, _numRows, _dict, _indexes, _data, getCachedCounts(), newRef); + else if(op.fn instanceof Multiply || op.fn instanceof Divide) { + final ADictionary newDict = _dict.binOpRight(op, v, _colIndexes); + return new ColGroupPFOR(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts(), newRef); + } + else { + final ADictionary newDict = _dict.binOpRight(op, v, _colIndexes, _reference, newRef); + return new ColGroupPFOR(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts(), newRef); + } + } + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + _indexes.write(out); + _data.write(out); + for(double d : _reference) + out.writeDouble(d); + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + _indexes = OffsetFactory.readIn(in); + _data = MapToFactory.readIn(in); + _reference = new double[_colIndexes.length]; + for(int i = 0; i < _colIndexes.length; i++) + _reference[i] = in.readDouble(); + } + + @Override + public long getExactSizeOnDisk() { + long ret = super.getExactSizeOnDisk(); + ret += _data.getExactSizeOnDisk(); + ret += _indexes.getExactSizeOnDisk(); + ret += 8 * _colIndexes.length; // reference values. + return ret; + } + + @Override + public AColGroup replace(double pattern, double replace) { + boolean patternInReference = false; + for(double d : _reference) + if(pattern == d) { + patternInReference = true; + break; + } + + if(patternInReference) { + throw new NotImplementedException("Not Implemented replace where a value in reference should be replaced"); + // _dict.replace(pattern, replace, _reference, _newReplace); + } + else { + final ADictionary newDict = _dict.replace(pattern, replace, _reference); + return create(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts(), _reference); + } + + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(super.toString()); + sb.append(String.format("\n%15s ", "Indexes: ")); + sb.append(_indexes.toString()); + sb.append(String.format("\n%15s ", "Data: ")); + sb.append(_data); + sb.append(String.format("\n%15s ", "Reference:")); + sb.append(Arrays.toString(_reference)); + return sb.toString(); + } + + @Override + protected double computeMxx(double c, Builtin builtin) { + return _dict.aggregate(c, builtin, _reference); + } + + @Override + protected void computeColMxx(double[] c, Builtin builtin) { + _dict.aggregateCols(c, builtin, _colIndexes, _reference); + } + + @Override + protected void computeSum(double[] c, int nRows) { + super.computeSum(c, nRows); + final double refSum = refSum(); + c[0] += refSum * nRows; + } + + @Override + public void computeColSums(double[] c, int nRows) { + super.computeColSums(c, nRows); + for(int i = 0; i < _colIndexes.length; i++) + c[_colIndexes[i]] += _reference[i] * nRows; + } + + @Override + protected void computeSumSq(double[] c, int nRows) { + c[0] += _dict.sumSq(getCounts(), _reference); + } + + @Override + protected void computeColSumsSq(double[] c, int nRows) { + _dict.colSumSq(c, getCounts(), _colIndexes, _reference); + } + + @Override + protected void computeProduct(double[] c, int nRows) { + throw new NotImplementedException("Not Implemented PFOR"); + } + + @Override + protected void computeRowProduct(double[] c, int rl, int ru) { + throw new NotImplementedException("Not Implemented PFOR"); + } + + @Override + protected void computeColProduct(double[] c, int nRows) { + throw new NotImplementedException("Not Implemented PFOR"); + } + + @Override + protected AColGroup sliceSingleColumn(int idx) { + ColGroupPFOR ret = (ColGroupPFOR) super.sliceSingleColumn(idx); + // select values from double array. + ret._reference = new double[1]; + ret._reference[0] = _reference[idx]; + return ret; + } + + @Override + protected AColGroup sliceMultiColumns(int idStart, int idEnd, int[] outputCols) { + ColGroupPFOR ret = (ColGroupPFOR) super.sliceMultiColumns(idStart, idEnd, outputCols); + final int len = idEnd - idStart; + ret._reference = new double[len]; + for(int i = 0, ii = idStart; i < len; i++, ii++) + ret._reference[i] = _reference[ii]; + + return ret; + } + + @Override + public boolean containsValue(double pattern) { + if(pattern == 0 && _zeros) + return true; + else if(Double.isNaN(pattern) || Double.isInfinite(pattern)) + return containsInfOrNan(pattern) || _dict.containsValue(pattern); + else + return _dict.containsValue(pattern, _reference); + } + + private boolean containsInfOrNan(double pattern) { + if(Double.isNaN(pattern)) { + for(double d : _reference) + if(Double.isNaN(d)) + return true; + return false; + } + else { + for(double d : _reference) + if(Double.isInfinite(d)) + return true; + return false; + } + } + + @Override + public long getNumberNonZeros(int nRows) { + int[] counts = getCounts(); + return (long) _dict.getNumberNonZeros(counts, _reference, nRows); + } + + @Override + public AColGroup extractCommon(double[] constV) { + for(int i = 0; i < _colIndexes.length; i++) + constV[_colIndexes[i]] += _reference[i]; + return ColGroupSDCZeros.create(_colIndexes, _numRows, _dict, _indexes, _data, getCounts()); + } +} diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java index 3d69b9662aa..3ee843468ec 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java @@ -24,7 +24,6 @@ import java.util.List; import org.apache.commons.lang.NotImplementedException; -import org.apache.sysds.runtime.compress.CompressionSettings; import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; import org.apache.sysds.runtime.data.DenseBlock; import org.apache.sysds.runtime.data.SparseBlock; @@ -146,7 +145,7 @@ public AColGroup scalarOperation(ScalarOperator op) { // fast path: sparse-safe operations // Note that bitmaps don't change and are shallow-copied if(op.sparseSafe || val0 == 0 || !_zeros) { - return new ColGroupRLE(_colIndexes, _numRows, _zeros, applyScalarOp(op), _data, _ptr, getCachedCounts()); + return new ColGroupRLE(_colIndexes, _numRows, _zeros, _dict.applyScalarOp(op), _data, _ptr, getCachedCounts()); } // slow path: sparse-unsafe operations (potentially create new bitmap) @@ -154,10 +153,10 @@ public AColGroup scalarOperation(ScalarOperator op) { boolean[] lind = computeZeroIndicatorVector(); int[] loff = computeOffsets(lind); if(loff.length == 0) { // empty offset list: go back to fast path - return new ColGroupRLE(_colIndexes, _numRows, false, applyScalarOp(op), _data, _ptr, getCachedCounts()); + return new ColGroupRLE(_colIndexes, _numRows, false, _dict.applyScalarOp(op), _data, _ptr, getCachedCounts()); } - ADictionary rvalues = applyScalarOp(op, val0, getNumCols()); + ADictionary rvalues = _dict.applyScalarOp(op, val0, getNumCols()); char[] lbitmap = genRLEBitmap(loff, loff.length); char[] rbitmaps = Arrays.copyOf(_data, _data.length + lbitmap.length); @@ -217,73 +216,143 @@ public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSa // } @Override - protected void computeRowSums(double[] c, boolean square, int rl, int ru) { + protected void computeRowSums(double[] c, int rl, int ru) { + throw new NotImplementedException(); + // final int numVals = getNumValues(); - final int numVals = getNumValues(); + // if(numVals > 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) { + // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; + + // // step 1: prepare position and value arrays + + // // current pos / values per RLE list + // int[] astart = new int[numVals]; + // int[] apos = skipScan(numVals, rl, astart); + // double[] aval = _dict.sumAllRowsToDouble(square, _colIndexes.length); + + // // step 2: cache conscious matrix-vector via horizontal scans + // for(int bi = rl; bi < ru; bi += blksz) { + // int bimax = Math.min(bi + blksz, ru); + + // // horizontal segment scan, incl pos maintenance + // for(int k = 0; k < numVals; k++) { + // int boff = _ptr[k]; + // int blen = len(k); + // double val = aval[k]; + // int bix = apos[k]; + // int start = astart[k]; + + // // compute partial results, not aligned + // while(bix < blen) { + // int lstart = _data[boff + bix]; + // int llen = _data[boff + bix + 1]; + // int from = Math.max(bi, start + lstart); + // int to = Math.min(start + lstart + llen, bimax); + // for(int rix = from; rix < to; rix++) + // c[rix] += val; + + // if(start + lstart + llen >= bimax) + // break; + // start += lstart + llen; + // bix += 2; + // } - if(numVals > 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) { - final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; - - // step 1: prepare position and value arrays - - // current pos / values per RLE list - int[] astart = new int[numVals]; - int[] apos = skipScan(numVals, rl, astart); - double[] aval = _dict.sumAllRowsToDouble(square, _colIndexes.length); - - // step 2: cache conscious matrix-vector via horizontal scans - for(int bi = rl; bi < ru; bi += blksz) { - int bimax = Math.min(bi + blksz, ru); - - // horizontal segment scan, incl pos maintenance - for(int k = 0; k < numVals; k++) { - int boff = _ptr[k]; - int blen = len(k); - double val = aval[k]; - int bix = apos[k]; - int start = astart[k]; - - // compute partial results, not aligned - while(bix < blen) { - int lstart = _data[boff + bix]; - int llen = _data[boff + bix + 1]; - int from = Math.max(bi, start + lstart); - int to = Math.min(start + lstart + llen, bimax); - for(int rix = from; rix < to; rix++) - c[rix] += val; - - if(start + lstart + llen >= bimax) - break; - start += lstart + llen; - bix += 2; - } - - apos[k] = bix; - astart[k] = start; - } - } - } - else { - for(int k = 0; k < numVals; k++) { - int boff = _ptr[k]; - int blen = len(k); - double val = _dict.sumRow(k, square, _colIndexes.length); - - if(val != 0.0) { - Pair tmp = skipScanVal(k, rl); - int bix = tmp.getKey(); - int curRunStartOff = tmp.getValue(); - int curRunEnd = tmp.getValue(); - for(; bix < blen && curRunEnd < ru; bix += 2) { - curRunStartOff = curRunEnd + _data[boff + bix]; - curRunEnd = curRunStartOff + _data[boff + bix + 1]; - for(int rix = curRunStartOff; rix < curRunEnd && rix < ru; rix++) - c[rix] += val; - - } - } - } - } + // apos[k] = bix; + // astart[k] = start; + // } + // } + // } + // else { + // for(int k = 0; k < numVals; k++) { + // int boff = _ptr[k]; + // int blen = len(k); + // double val = _dict.sumRow(k, square, _colIndexes.length); + + // if(val != 0.0) { + // Pair tmp = skipScanVal(k, rl); + // int bix = tmp.getKey(); + // int curRunStartOff = tmp.getValue(); + // int curRunEnd = tmp.getValue(); + // for(; bix < blen && curRunEnd < ru; bix += 2) { + // curRunStartOff = curRunEnd + _data[boff + bix]; + // curRunEnd = curRunStartOff + _data[boff + bix + 1]; + // for(int rix = curRunStartOff; rix < curRunEnd && rix < ru; rix++) + // c[rix] += val; + + // } + // } + // } + // } + } + + @Override + protected void computeRowSumsSq(double[] c, int rl, int ru) { + throw new NotImplementedException(); + // final int numVals = getNumValues(); + + // if(numVals > 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) { + // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; + + // // step 1: prepare position and value arrays + + // // current pos / values per RLE list + // int[] astart = new int[numVals]; + // int[] apos = skipScan(numVals, rl, astart); + // double[] aval = _dict.sumAllRowsToDouble(square, _colIndexes.length); + + // // step 2: cache conscious matrix-vector via horizontal scans + // for(int bi = rl; bi < ru; bi += blksz) { + // int bimax = Math.min(bi + blksz, ru); + + // // horizontal segment scan, incl pos maintenance + // for(int k = 0; k < numVals; k++) { + // int boff = _ptr[k]; + // int blen = len(k); + // double val = aval[k]; + // int bix = apos[k]; + // int start = astart[k]; + + // // compute partial results, not aligned + // while(bix < blen) { + // int lstart = _data[boff + bix]; + // int llen = _data[boff + bix + 1]; + // int from = Math.max(bi, start + lstart); + // int to = Math.min(start + lstart + llen, bimax); + // for(int rix = from; rix < to; rix++) + // c[rix] += val; + + // if(start + lstart + llen >= bimax) + // break; + // start += lstart + llen; + // bix += 2; + // } + + // apos[k] = bix; + // astart[k] = start; + // } + // } + // } + // else { + // for(int k = 0; k < numVals; k++) { + // int boff = _ptr[k]; + // int blen = len(k); + // double val = _dict.sumRow(k, square, _colIndexes.length); + + // if(val != 0.0) { + // Pair tmp = skipScanVal(k, rl); + // int bix = tmp.getKey(); + // int curRunStartOff = tmp.getValue(); + // int curRunEnd = tmp.getValue(); + // for(; bix < blen && curRunEnd < ru; bix += 2) { + // curRunStartOff = curRunEnd + _data[boff + bix]; + // curRunEnd = curRunStartOff + _data[boff + bix + 1]; + // for(int rix = curRunStartOff; rix < curRunEnd && rix < ru; rix++) + // c[rix] += val; + + // } + // } + // } + // } } @Override @@ -395,7 +464,7 @@ public double getIdx(int r, int colIdx) { * @return array of positions for all values */ private int[] skipScan(int numVals, int rl, int[] astart) { - int[] apos = allocIVector(numVals, rl == 0); + int[] apos = new int[numVals]; if(rl > 0) { // rl aligned with blksz for(int k = 0; k < numVals; k++) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java index fc011e082a1..9aef4313406 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java @@ -23,18 +23,13 @@ import java.io.DataOutput; import java.io.IOException; -import org.apache.commons.lang.NotImplementedException; -import org.apache.sysds.runtime.compress.DMLCompressionException; import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; import org.apache.sysds.runtime.compress.colgroup.offset.AIterator; import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory; -import org.apache.sysds.runtime.data.DenseBlock; -import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.functionobjects.Builtin; -import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.matrix.operators.BinaryOperator; import org.apache.sysds.runtime.matrix.operators.ScalarOperator; @@ -46,7 +41,7 @@ * This column group is handy in cases where sparse unsafe operations is executed on very sparse columns. Then the zeros * would be materialized in the group without any overhead. */ -public class ColGroupSDC extends AColGroupValue { +public class ColGroupSDC extends AMorphingMMColGroup { private static final long serialVersionUID = 769993538831949086L; /** * Sparse row indexes for the data @@ -66,7 +61,7 @@ protected ColGroupSDC(int numRows) { super(numRows); } - protected ColGroupSDC(int[] colIndices, int numRows, ADictionary dict, AOffset offsets, AMapToData data, + private ColGroupSDC(int[] colIndices, int numRows, ADictionary dict, AOffset offsets, AMapToData data, int[] cachedCounts) { super(colIndices, numRows, dict, cachedCounts); _indexes = offsets; @@ -74,6 +69,14 @@ protected ColGroupSDC(int[] colIndices, int numRows, ADictionary dict, AOffset o _zeros = false; } + protected static AColGroup create(int[] colIndices, int numRows, ADictionary dict, AOffset offsets, AMapToData data, + int[] cachedCounts) { + if(dict == null) + return new ColGroupEmpty(colIndices); + else + return new ColGroupSDC(colIndices, numRows, dict, offsets, data, cachedCounts); + } + @Override public CompressionType getCompType() { return CompressionType.SDC; @@ -85,183 +88,153 @@ public ColGroupType getColGroupType() { } @Override - protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, - double[] values) { + public double getIdx(int r, int colIdx) { + final AIterator it = _indexes.getIterator(r); final int nCol = _colIndexes.length; - final int offsetToDefault = values.length - nCol; - final AIterator it = _indexes.getIterator(rl); - - int offT = rl + offR; - int i = rl; - for(; i < ru && it.hasNext(); i++, offT++) { - final double[] c = db.values(offT); - final int off = db.pos(offT) + offC; - if(it.value() == i) { - int offset = _data.getIndex(it.getDataIndexAndIncrement()) * nCol; - for(int j = 0; j < nCol; j++) - c[off + _colIndexes[j]] += values[offset + j]; - } - else - for(int j = 0; j < nCol; j++) - c[off + _colIndexes[j]] += values[offsetToDefault + j]; - } - - for(; i < ru; i++, offT++) { - final double[] c = db.values(offT); - final int off = db.pos(offT) + offC; - for(int j = 0; j < nCol; j++) - c[off + _colIndexes[j]] += values[offsetToDefault + j]; - } - - _indexes.cacheIterator(it, ru); - } - - @Override - protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, - SparseBlock sb) { - throw new NotImplementedException(); - // final int offsetToDefault = sb.numRows() - 1; - // final int defApos = sb.pos(offsetToDefault); - // final int defAlen = sb.size(offsetToDefault) + defApos; - // final double[] defAvals = sb.values(offsetToDefault); - // final int[] defAix = sb.indexes(offsetToDefault); - // final DenseBlock db = target.getDenseBlock(); - - // int i = rl; - // AIterator it = _indexes.getIterator(rl); - // for(; i < ru && it.hasNext(); i++, offT++) { - // final double[] c = db.values(offT); - // final int off = db.pos(offT); - // if(it.value() == i) { - // int dictIndex = _data.getIndex(it.getDataIndexAndIncrement()); - // if(sb.isEmpty(dictIndex)) - // continue; - // final int apos = sb.pos(dictIndex); - // final int alen = sb.size(dictIndex) + apos; - // final double[] avals = sb.values(dictIndex); - // final int[] aix = sb.indexes(dictIndex); - // for(int j = apos; j < alen; j++) - // c[off + _colIndexes[aix[j]]] += avals[j]; - // } - // else - // for(int j = defApos; j < defAlen; j++) - // c[off + _colIndexes[defAix[j]]] += defAvals[j]; - // } - - // for(; i < ru; i++, offT++) { - // final double[] c = db.values(offT); - // final int off = db.pos(offT); - // for(int j = defApos; j < defAlen; j++) - // c[off + _colIndexes[defAix[j]]] += defAvals[j]; - // } - - // _indexes.cacheIterator(it, ru); + final int rowOff = it.value() == r ? _data.getIndex(it.getDataIndex()) * nCol : getNumValues() * nCol - nCol; + return _dict.getValue(rowOff + colIdx); } @Override - protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, - SparseBlock sb) { - throw new NotImplementedException(); - } + protected void computeRowSums(double[] c, int rl, int ru) { - @Override - protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, - double[] values) { - final int nCol = _colIndexes.length; - final int offsetToDefault = values.length - nCol; final AIterator it = _indexes.getIterator(rl); - - int offT = rl + offR; - int i = rl; - for(; i < ru && it.hasNext(); i++, offT++) { - // final double[] c = db.values(offT); - // final int off = db.pos(offT) + offC; - if(it.value() == i) { - int offset = _data.getIndex(it.getDataIndexAndIncrement()) * nCol; - for(int j = 0; j < nCol; j++) - ret.append(offT, _colIndexes[j] + offC, values[offset + j]); - // c[off + _colIndexes[j]] += values[offset + j]; + final int numVals = getNumValues(); + int r = rl; + final double[] vals = _dict.sumAllRowsToDouble(_colIndexes.length); + final double def = vals[numVals - 1]; + if(it != null && it.value() > ru) + _indexes.cacheIterator(it, ru); + else if(it != null && ru >= _indexes.getOffsetToLast()) { + final int maxId = _data.size() - 1; + while(true) { + if(it.value() == r) { + c[r] += vals[_data.getIndex(it.getDataIndex())]; + if(it.getDataIndex() < maxId) + it.next(); + else { + r++; + break; + } + } + else + c[r] += def; + r++; } - else - for(int j = 0; j < nCol; j++) - ret.append(offT, _colIndexes[j] + offC, values[offsetToDefault + j]); - // c[off + _colIndexes[j]] += values[offsetToDefault + j]; } - - for(; i < ru; i++, offT++) { - // final double[] c = db.values(offT); - // final int off = db.pos(offT) + offC; - for(int j = 0; j < nCol; j++) - ret.append(offT, _colIndexes[j] + offC, values[offsetToDefault + j]); - // c[off + _colIndexes[j]] += values[offsetToDefault + j]; + else if(it != null) { + while(it.isNotOver(ru)) { + if(it.value() == r) + c[r] += vals[_data.getIndex(it.getDataIndexAndIncrement())]; + else + c[r] += def; + r++; + } + _indexes.cacheIterator(it, ru); } - _indexes.cacheIterator(it, ru); + while(r < ru) { + c[r] += def; + r++; + } } @Override - public double getIdx(int r, int colIdx) { - final AIterator it = _indexes.getIterator(r); - final int nCol = _colIndexes.length; - final int rowOff = it.value() == r ? getIndex(it.getDataIndex()) * nCol : getNumValues() * nCol - nCol; - return _dict.getValue(rowOff + colIdx); + protected void computeRowSumsSq(double[] c, int rl, int ru) { + final double[] vals = _dict.sumAllRowsToDoubleSq(_colIndexes.length); + computeRowSumsSq(c, rl, ru, vals, _data, _indexes, _numRows); } - @Override - protected void computeRowSums(double[] c, boolean square, int rl, int ru) { - final int numVals = getNumValues(); - // // pre-aggregate nnz per value tuple - double[] vals = _dict.sumAllRowsToDouble(square, _colIndexes.length); - - int rix = rl; - AIterator it = _indexes.getIterator(rl); - for(; rix < ru && it.hasNext(); rix++) { - if(it.value() != rix) - c[rix] += vals[numVals - 1]; - else { - c[rix] += vals[_data.getIndex(it.getDataIndexAndIncrement())]; + protected static final void computeRowSumsSq(double[] c, int rl, int ru, double[] vals, AMapToData data, + AOffset indexes, int nRows) { + int r = rl; + final AIterator it = indexes.getIterator(rl); + final double def = vals[vals.length - 1]; + if(it != null && it.value() > ru) + indexes.cacheIterator(it, ru); + else if(it != null && ru >= indexes.getOffsetToLast()) { + final int maxId = data.size() - 1; + while(true) { + if(it.value() == r) { + c[r] += vals[data.getIndex(it.getDataIndex())]; + if(it.getDataIndex() < maxId) + it.next(); + else { + r++; + break; + } + } + else + c[r] += def; + r++; } } - for(; rix < ru; rix++) { - c[rix] += vals[numVals - 1]; + else if(it != null) { + while(r < ru) { + if(it.value() == r) + c[r] += vals[data.getIndex(it.getDataIndexAndIncrement())]; + else + c[r] += def; + r++; + } + indexes.cacheIterator(it, ru); } + while(r < ru) { + c[r] += def; + r++; + } } @Override protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru) { - final int numVals = getNumValues(); - final double[] vals = _dict.aggregateTuples(builtin, _colIndexes.length); - final AIterator it = _indexes.getIterator(rl); - int rix = rl; + final double[] vals = _dict.aggregateRows(builtin, _colIndexes.length); + computeRowMxx(c, builtin, rl, ru, vals, _data, _indexes, _numRows, vals[vals.length - 1]); + } - for(; rix < ru && it.hasNext(); rix++) { - if(it.value() != rix) - c[rix] = builtin.execute(c[rix], vals[numVals - 1]); - else - c[rix] = builtin.execute(c[rix], vals[_data.getIndex(it.getDataIndexAndIncrement())]); + protected static final void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] vals, + AMapToData data, AOffset indexes, int nRows, double def) { + int r = rl; + final AIterator it = indexes.getIterator(rl); + if(it != null && it.value() > ru) + indexes.cacheIterator(it, ru); + else if(it != null && ru >= indexes.getOffsetToLast()) { + final int maxId = data.size() - 1; + while(true) { + if(it.value() == r) { + c[r] = builtin.execute(c[r], vals[data.getIndex(it.getDataIndex())]); + if(it.getDataIndex() < maxId) + it.next(); + else { + r++; + break; + } + } + else + c[r] = builtin.execute(c[r], def); + r++; + } + } + else if(it != null) { + while(r < ru) { + if(it.value() == r) + c[r] = builtin.execute(c[r], vals[data.getIndex(it.getDataIndexAndIncrement())]); + else + c[r] = builtin.execute(c[r], def); + r++; + } + indexes.cacheIterator(it, ru); } - // cover remaining rows with default value - for(; rix < ru; rix++) - c[rix] = builtin.execute(c[rix], vals[numVals - 1]); + while(r < ru) { + c[r] = builtin.execute(c[r], def); + r++; + } } @Override public int[] getCounts(int[] counts) { - final int nonDefaultLength = _data.size(); - // final AIterator it = _indexes.getIterator(); - final int defaults = _numRows - nonDefaultLength; - for(int i = 0; i < nonDefaultLength; i++) - counts[_data.getIndex(i)]++; - - counts[counts.length - 1] += defaults; - - return counts; - } - - public int getIndex(int r) { - return _data.getIndex(r); + return _data.getCounts(counts, _numRows); } @Override @@ -274,19 +247,19 @@ public long estimateInMemorySize() { @Override public AColGroup scalarOperation(ScalarOperator op) { - return new ColGroupSDC(_colIndexes, _numRows, applyScalarOp(op), _indexes, _data, getCachedCounts()); + return create(_colIndexes, _numRows, _dict.applyScalarOp(op), _indexes, _data, getCachedCounts()); } @Override public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) { ADictionary ret = _dict.binOpLeft(op, v, _colIndexes); - return new ColGroupSDC(_colIndexes, _numRows, ret, _indexes, _data, getCachedCounts()); + return create(_colIndexes, _numRows, ret, _indexes, _data, getCachedCounts()); } @Override public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) { ADictionary ret = _dict.binOpRight(op, v, _colIndexes); - return new ColGroupSDC(_colIndexes, _numRows, ret, _indexes, _data, getCachedCounts()); + return create(_colIndexes, _numRows, ret, _indexes, _data, getCachedCounts()); } @Override @@ -311,40 +284,17 @@ public long getExactSizeOnDisk() { return ret; } - public ColGroupSDCZeros extractCommon(double[] constV) { + @Override + public AColGroup extractCommon(double[] constV) { double[] commonV = _dict.getTuple(getNumValues() - 1, _colIndexes.length); if(commonV == null) // The common tuple was all zero. Therefore this column group should never have been SDC. - return new ColGroupSDCZeros(_colIndexes, _numRows, _dict, _indexes, _data, getCounts()); + return ColGroupSDCZeros.create(_colIndexes, _numRows, _dict, _indexes, _data, getCounts()); for(int i = 0; i < _colIndexes.length; i++) constV[_colIndexes[i]] += commonV[i]; ADictionary subtractedDict = _dict.subtractTuple(commonV); - return new ColGroupSDCZeros(_colIndexes, _numRows, subtractedDict, _indexes, _data, getCounts()); - } - - @Override - public void leftMultByMatrix(MatrixBlock matrix, MatrixBlock result, int rl, int ru) { - // This method should not be called since if there is a matrix multiplication - // the default value is transformed to be zero, and this column group would be allocated as a - // SDC Zeros version - throw new DMLCompressionException("This method should never be called"); - } - - @Override - public void leftMultByAColGroup(AColGroup lhs, MatrixBlock result) { - // This method should not be called since if there is a matrix multiplication - // the default value is transformed to be zero, and this column group would be allocated as a - // SDC Zeros version - throw new DMLCompressionException("This method should never be called"); - } - - @Override - public void tsmmAColGroup(AColGroup other, MatrixBlock result) { - // This method should not be called since if there is a matrix multiplication - // the default value is transformed to be zero, and this column group would be allocated as a - // SDC Zeros version - throw new DMLCompressionException("This method should never be called"); + return ColGroupSDCZeros.create(_colIndexes, _numRows, subtractedDict, _indexes, _data, getCounts()); } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java index cb123eca99c..c3f19c5ddad 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java @@ -23,16 +23,11 @@ import java.io.DataOutput; import java.io.IOException; -import org.apache.commons.lang.NotImplementedException; -import org.apache.sysds.runtime.compress.DMLCompressionException; import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; import org.apache.sysds.runtime.compress.colgroup.offset.AIterator; import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory; -import org.apache.sysds.runtime.data.DenseBlock; -import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.functionobjects.Builtin; -import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.matrix.operators.BinaryOperator; import org.apache.sysds.runtime.matrix.operators.ScalarOperator; @@ -44,7 +39,7 @@ * This column group is handy in cases where sparse unsafe operations is executed on very sparse columns. Then the zeros * would be materialized in the group without any overhead. */ -public class ColGroupSDCSingle extends AColGroupValue { +public class ColGroupSDCSingle extends AMorphingMMColGroup { private static final long serialVersionUID = 3883228464052204200L; /** * Sparse row indexes for the data @@ -76,126 +71,185 @@ public ColGroupType getColGroupType() { return ColGroupType.SDCSingle; } - @Override - protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, - double[] values) { - final int nCol = _colIndexes.length; - final int offsetToDefault = values.length - nCol; - final AIterator it = _indexes.getIterator(rl); - - int offT = rl + offR; - int i = rl; - for(; i < ru && it.hasNext(); i++, offT++) { - final double[] c = db.values(offT); - final int off = db.pos(offT) + offC; - if(it.value() == i) { - for(int j = 0; j < nCol; j++) - c[off + _colIndexes[j]] += values[j]; - it.next(); - } - else - for(int j = 0; j < nCol; j++) - c[off + _colIndexes[j]] += values[offsetToDefault + j]; - } - - for(; i < ru; i++, offT++) { - final double[] c = db.values(offT); - final int off = db.pos(offT) + offC; - for(int j = 0; j < nCol; j++) - c[off + _colIndexes[j]] += values[offsetToDefault + j]; - } + // @Override + // protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, + // double[] values) { + // final int nCol = _colIndexes.length; + // final int offsetToDefault = values.length - nCol; + // final AIterator it = _indexes.getIterator(rl); + + // int offT = rl + offR; + // int i = rl; + // for(; i < ru && it.hasNext(); i++, offT++) { + // final double[] c = db.values(offT); + // final int off = db.pos(offT) + offC; + // if(it.value() == i) { + // for(int j = 0; j < nCol; j++) + // c[off + _colIndexes[j]] += values[j]; + // it.next(); + // } + // else + // for(int j = 0; j < nCol; j++) + // c[off + _colIndexes[j]] += values[offsetToDefault + j]; + // } + + // for(; i < ru; i++, offT++) { + // final double[] c = db.values(offT); + // final int off = db.pos(offT) + offC; + // for(int j = 0; j < nCol; j++) + // c[off + _colIndexes[j]] += values[offsetToDefault + j]; + // } + + // _indexes.cacheIterator(it, ru); + // } + + // @Override + // protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, + // SparseBlock values) { + // throw new NotImplementedException(); + // } + + // @Override + // protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, + // SparseBlock sb) { + // throw new NotImplementedException(); + // } + + // @Override + // protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, + // double[] values) { + // final int nCol = _colIndexes.length; + // final int offsetToDefault = values.length - nCol; + // final AIterator it = _indexes.getIterator(rl); + + // int offT = rl + offR; + // int i = rl; + // for(; i < ru && it.hasNext(); i++, offT++) { + // if(it.value() == i) { + // for(int j = 0; j < nCol; j++) + // ret.append(offT, _colIndexes[j] + offC, values[j]); + // it.next(); + // } + // else + // for(int j = 0; j < nCol; j++) + // ret.append(offT, _colIndexes[j] + offC, values[offsetToDefault + j]); + // } + + // for(; i < ru; i++, offT++) + // for(int j = 0; j < nCol; j++) + // ret.append(offT, _colIndexes[j] + offC, values[offsetToDefault + j]); + + // _indexes.cacheIterator(it, ru); + // } - _indexes.cacheIterator(it, ru); + @Override + public double getIdx(int r, int colIdx) { + AIterator it = _indexes.getIterator(r); + if(it.value() == r) + return _dict.getValue(colIdx); + else + return _dict.getValue(_colIndexes.length + colIdx); } @Override - protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, - SparseBlock values) { - throw new NotImplementedException(); + protected void computeRowSums(double[] c, int rl, int ru) { + final double[] vals = _dict.sumAllRowsToDouble(_colIndexes.length); + computeRowSums(c, rl, ru, vals); } @Override - protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, - SparseBlock sb) { - throw new NotImplementedException(); + protected void computeRowSumsSq(double[] c, int rl, int ru) { + final double[] vals = _dict.sumAllRowsToDoubleSq(_colIndexes.length); + computeRowSums(c, rl, ru, vals); } - @Override - protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, - double[] values) { - final int nCol = _colIndexes.length; - final int offsetToDefault = values.length - nCol; + protected void computeRowSums(double[] c, int rl, int ru, double[] vals) { + int r = rl; final AIterator it = _indexes.getIterator(rl); - - int offT = rl + offR; - int i = rl; - for(; i < ru && it.hasNext(); i++, offT++) { - if(it.value() == i) { - for(int j = 0; j < nCol; j++) - ret.append(offT, _colIndexes[j] + offC, values[j]); - it.next(); + final double def = vals[1]; + final double norm = vals[0]; + if(it != null && it.value() > ru) + _indexes.cacheIterator(it, ru); + else if(it != null && ru >= _indexes.getOffsetToLast()) { + final int maxOff = _indexes.getOffsetToLast(); + while(true) { + if(it.value() == r) { + c[r] += norm; + if(it.value() < maxOff) + it.next(); + else { + r++; + break; + } + } + else + c[r] += def; + r++; } - else - for(int j = 0; j < nCol; j++) - ret.append(offT, _colIndexes[j] + offC, values[offsetToDefault + j]); + } + else if(it != null) { + while(r < ru) { + if(it.value() == r) + c[r] += norm; + else + c[r] += def; + r++; + } + _indexes.cacheIterator(it, ru); } - for(; i < ru; i++, offT++) - for(int j = 0; j < nCol; j++) - ret.append(offT, _colIndexes[j] + offC, values[offsetToDefault + j]); - - _indexes.cacheIterator(it, ru); + while(r < ru) { + c[r] += def; + r++; + } } @Override - public double getIdx(int r, int colIdx) { - AIterator it = _indexes.getIterator(r); - if(it.value() == r) - return _dict.getValue(colIdx); - else - return _dict.getValue(_colIndexes.length + colIdx); + protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru) { + final double[] vals = _dict.aggregateRows(builtin, _colIndexes.length); + computeRowMxx(c, builtin, rl, ru, _indexes, _numRows, vals[1], vals[0]); } - @Override - protected void computeRowSums(double[] c, boolean square, int rl, int ru) { - - // // pre-aggregate nnz per value tuple - final double[] vals = _dict.sumAllRowsToDouble(square, _colIndexes.length); - final AIterator it = _indexes.getIterator(); - - int rix = rl; - it.skipTo(rl); - for(; rix < ru && it.hasNext(); rix++) { - if(it.value() != rix) - c[rix] += vals[1]; - else { - c[rix] += vals[0]; - it.next(); + protected static final void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, AOffset indexes, int nRows, + double def, double norm) { + int r = rl; + final AIterator it = indexes.getIterator(rl); + if(it != null && it.value() > ru) + indexes.cacheIterator(it, ru); + else if(it != null && ru >= indexes.getOffsetToLast()) { + final int maxOff = indexes.getOffsetToLast(); + while(true) { + if(it.value() == r) { + c[r] = builtin.execute(c[r], norm); + if(it.value() < maxOff) + it.next(); + else { + r++; + break; + } + } + else + c[r] = builtin.execute(c[r], def); + r++; } } - for(; rix < ru; rix++) { - c[rix] += vals[1]; - } - } - - @Override - protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru) { - final double[] vals = _dict.aggregateTuples(builtin, _colIndexes.length); - final AIterator it = _indexes.getIterator(rl); - int rix = rl; - - for(; rix < ru && it.hasNext(); rix++) { - if(it.value() != rix) - c[rix] = builtin.execute(c[rix], vals[1]); - else { - c[rix] = builtin.execute(c[rix], vals[0]); - it.next(); + else if(it != null) { + while(r < ru) { + if(it.value() == r) { + c[r] = builtin.execute(c[r], norm); + it.next(); + } + else + c[r] = builtin.execute(c[r], def); + r++; } + indexes.cacheIterator(it, ru); } - // cover remaining rows with default value - for(; rix < ru; rix++) - c[rix] = builtin.execute(c[rix], vals[1]); + while(r < ru) { + c[r] = builtin.execute(c[r], def); + r++; + } } @Override @@ -214,7 +268,7 @@ public long estimateInMemorySize() { @Override public AColGroup scalarOperation(ScalarOperator op) { - return new ColGroupSDCSingle(_colIndexes, _numRows, applyScalarOp(op), _indexes, getCachedCounts()); + return new ColGroupSDCSingle(_colIndexes, _numRows, _dict.applyScalarOp(op), _indexes, getCachedCounts()); } @Override @@ -248,6 +302,7 @@ public long getExactSizeOnDisk() { return ret; } + @Override public ColGroupSDCSingleZeros extractCommon(double[] constV) { double[] commonV = _dict.getTuple(getNumValues() - 1, _colIndexes.length); @@ -261,30 +316,6 @@ public ColGroupSDCSingleZeros extractCommon(double[] constV) { return new ColGroupSDCSingleZeros(_colIndexes, _numRows, subtractedDict, _indexes, getCachedCounts()); } - @Override - public void leftMultByMatrix(MatrixBlock matrix, MatrixBlock result, int rl, int ru) { - // This method should not be called since if there is a matrix multiplication - // the default value is transformed to be zero, and this column group would be allocated as a - // SDC Zeros version - throw new DMLCompressionException("This method should never be called"); - } - - @Override - public void leftMultByAColGroup(AColGroup lhs, MatrixBlock result) { - // This method should not be called since if there is a matrix multiplication - // the default value is transformed to be zero, and this column group would be allocated as a - // SDC Zeros version - throw new DMLCompressionException("This method should never be called"); - } - - @Override - public void tsmmAColGroup(AColGroup other, MatrixBlock result) { - // This method should not be called since if there is a matrix multiplication - // the default value is transformed to be zero, and this column group would be allocated as a - // SDC Zeros version - throw new DMLCompressionException("This method should never be called"); - } - @Override public String toString() { StringBuilder sb = new StringBuilder(); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java index d8edd0d3c7c..534856735b4 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java @@ -46,9 +46,7 @@ */ public class ColGroupSDCSingleZeros extends APreAgg { private static final long serialVersionUID = 8033235615964315078L; - /** - * Sparse row indexes for the data - */ + /** Sparse row indexes for the data */ protected transient AOffset _indexes; /** @@ -80,64 +78,129 @@ public ColGroupType getColGroupType() { @Override protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, double[] values) { - final int nCol = _colIndexes.length; + final AIterator it = _indexes.getIterator(rl); - while(it.hasNext() && it.value() < ru) { - final int row = offR + it.value(); - final double[] c = db.values(row); - final int off = db.pos(row) + offC; - for(int j = 0; j < nCol; j++) - c[off + _colIndexes[j]] += values[j]; - - it.next(); + if(it == null) + return; + else if(it.value() >= ru) + _indexes.cacheIterator(it, ru); + else if(ru >= _indexes.getOffsetToLast()) { + final int maxOff = _indexes.getOffsetToLast(); + final int nCol = _colIndexes.length; + while(true) { + final int row = offR + it.value(); + final double[] c = db.values(row); + final int off = db.pos(row); + for(int j = 0; j < nCol; j++) + c[off + _colIndexes[j] + offC] += values[j]; + if(it.value() < maxOff) + it.next(); + else + break; + } } - _indexes.cacheIterator(it, ru); + else { + final int nCol = _colIndexes.length; + while(it.isNotOver(ru)) { + final int row = offR + it.value(); + final double[] c = db.values(row); + final int off = db.pos(row); + for(int j = 0; j < nCol; j++) + c[off + _colIndexes[j] + offC] += values[j]; + + it.next(); + } + _indexes.cacheIterator(it, ru); + } + } @Override protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, - SparseBlock values) { - throw new NotImplementedException(); - // final int offTCorr = offT - rl; - // final DenseBlock db = target.getDenseBlock(); - // final int apos = values.pos(0); - // final int alen = values.size(0) + apos; - // final int[] aix = values.indexes(0); - // final double[] avals = values.values(0); - - // AIterator it = _indexes.getIterator(rl); - // while(it.hasNext() && it.value() < ru) { - // final int idx = offTCorr + it.value(); - // final double[] c = db.values(idx); - // final int off = db.pos(idx); - - // for(int j = apos; j < alen; j++) - // c[off + _colIndexes[aix[j]]] += avals[j]; - - // it.next(); - // } - - // _indexes.cacheIterator(it, ru); + SparseBlock sb) { + final AIterator it = _indexes.getIterator(rl); + if(it == null) + return; + else if(it.value() >= ru) + _indexes.cacheIterator(it, ru); + else if(ru >= _indexes.getOffsetToLast()) { + throw new NotImplementedException(); + } + else { + final int apos = sb.pos(0); + final int alen = sb.size(0) + apos; + final int[] aix = sb.indexes(0); + final double[] avals = sb.values(0); + while(it.isNotOver(ru)) { + final int row = offR + it.value(); + final double[] c = db.values(row); + final int off = db.pos(row); + for(int j = apos; j < alen; j++) + c[off + _colIndexes[aix[j]] + offC] += avals[j]; + it.next(); + } + _indexes.cacheIterator(it, ru); + } } @Override protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, SparseBlock sb) { - throw new NotImplementedException(); + final AIterator it = _indexes.getIterator(rl); + if(it == null) + return; + else if(it.value() >= ru) + _indexes.cacheIterator(it, ru); + else if(ru >= _indexes.getOffsetToLast()) { + throw new NotImplementedException(); + } + else { + final int apos = sb.pos(0); + final int alen = sb.size(0) + apos; + final int[] aix = sb.indexes(0); + final double[] avals = sb.values(0); + while(it.isNotOver(ru)) { + final int row = offR + it.value(); + for(int j = apos; j < alen; j++) + ret.append(row, _colIndexes[aix[j]] + offC, avals[j]); + + it.next(); + } + _indexes.cacheIterator(it, ru); + } } @Override protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, double[] values) { - final int nCol = _colIndexes.length; final AIterator it = _indexes.getIterator(rl); - while(it.hasNext() && it.value() < ru) { - final int row = offR + it.value(); - for(int j = 0; j < nCol; j++) - ret.append(row, _colIndexes[j] + offC, values[j]); - it.next(); + if(it == null) + return; + else if(it.value() >= ru) + _indexes.cacheIterator(it, ru); + else if(ru >= _indexes.getOffsetToLast()) { + final int nCol = _colIndexes.length; + final int lastOff = _indexes.getOffsetToLast(); + while(true) { + final int row = offR + it.value(); + for(int j = 0; j < nCol; j++) + ret.append(row, _colIndexes[j] + offC, values[j]); + if(it.value() == lastOff) + return; + it.next(); + } + } + else { + final int nCol = _colIndexes.length; + while(it.isNotOver(ru)) { + final int row = offR + it.value(); + for(int j = 0; j < nCol; j++) + ret.append(row, _colIndexes[j] + offC, values[j]); + + it.next(); + } + _indexes.cacheIterator(it, ru); } - _indexes.cacheIterator(it, ru); } @Override @@ -150,34 +213,45 @@ public double getIdx(int r, int colIdx) { } @Override - protected void computeRowSums(double[] c, boolean square, int rl, int ru) { - final double vals = _dict.sumAllRowsToDouble(square, _colIndexes.length)[0]; - final AIterator it = _indexes.getIterator(rl); - while(it.hasNext() && it.value() < ru) { - c[it.value()] += vals; - it.next(); - } - + protected void computeRowSums(double[] c, int rl, int ru) { + final double def = _dict.sumAllRowsToDouble(_colIndexes.length)[0]; + computeRowSum(c, rl, ru, def); } @Override - protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru) { - final double vals = _dict.aggregateTuples(builtin, _colIndexes.length)[0]; - final AIterator it = _indexes.getIterator(rl); - int rix = rl; + protected void computeRowSumsSq(double[] c, int rl, int ru) { + final double def = _dict.sumAllRowsToDoubleSq(_colIndexes.length)[0]; + computeRowSum(c, rl, ru, def); + } - for(; rix < ru && it.hasNext(); rix++) { - if(it.value() != rix) - c[rix] = builtin.execute(c[rix], 0); - else { - c[rix] = builtin.execute(c[rix], vals); + protected void computeRowSum(double[] c, int rl, int ru, double def) { + final AIterator it = _indexes.getIterator(rl); + if(it == null) + return; + else if(it.value() > ru) + _indexes.cacheIterator(it, ru); + else if(ru >= _indexes.getOffsetToLast()) { + final int maxOff = _indexes.getOffsetToLast(); + while(true) { + c[it.value()] += def; + if(it.value() == maxOff) + break; + it.next(); + } + } + else { + while(it.isNotOver(ru)) { + c[it.value()] += def; it.next(); } + _indexes.cacheIterator(it, ru); } + } - // cover remaining rows - for(; rix < ru; rix++) - c[rix] = builtin.execute(c[rix], 0); + @Override + protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru) { + final double[] vals = _dict.aggregateRows(builtin, _colIndexes.length); + ColGroupSDCSingle.computeRowMxx(c, builtin, rl, ru, _indexes, _numRows, 0, vals[0]); } @Override @@ -197,66 +271,88 @@ public void preAggregate(MatrixBlock m, MatrixBlock preAgg, int rl, int ru) { @Override public void preAggregateDense(MatrixBlock m, MatrixBlock preAgg, int rl, int ru, int cl, int cu) { - final double[] mV = m.getDenseBlockValues(); - final double[] preAV = preAgg.getDenseBlockValues(); - final int numVals = getNumValues(); - final int blockSize = 2000; - for(int block = cl; block < cu; block += blockSize) { - final int blockEnd = Math.min(block + blockSize, cu); - final AIterator itStart = _indexes.getIterator(block); - AIterator it; - for(int rowLeft = rl, offOut = 0; rowLeft < ru; rowLeft++, offOut += numVals) { - final int offLeft = rowLeft * _numRows; - it = itStart.clone(); - while(it.value() < blockEnd && it.hasNext()) { - final int i = it.value(); - preAV[offOut] += mV[offLeft + i]; - it.next(); - } + + final AIterator it = _indexes.getIterator(cl); + if(it == null) + return; + else if(it.value() > cu) + _indexes.cacheIterator(it, cu); + else if(rl == ru - 1) { + final int maxOff = _indexes.getOffsetToLast(); + final double[] mV = m.getDenseBlockValues(); + final double[] preAV = preAgg.getDenseBlockValues(); + final int offLeft = rl * _numRows; + while(true) { + final int i = it.value(); + preAV[0] += mV[offLeft + i]; + if(i == maxOff) + break; + it.next(); } } + else + throw new NotImplementedException(); + } private void preAggregateDense(MatrixBlock m, MatrixBlock preAgg, int rl, int ru) { - final double[] preAV = preAgg.getDenseBlockValues(); - final double[] mV = m.getDenseBlockValues(); - final int numVals = getNumValues(); - for(int rowLeft = rl, offOut = 0; rowLeft < ru; rowLeft++, offOut += numVals) { - final AIterator it = _indexes.getIterator(); - final int offLeft = rowLeft * _numRows; - while(it.hasNext()) { - final int i = it.value(); - preAV[offOut] += mV[offLeft + i]; + final AIterator it = _indexes.getIterator(); + if(rl == ru - 1) { + double ret = 0; + final DenseBlock db = m.getDenseBlock(); + final double[] mV = db.values(rl); + final int off = db.pos(rl); + final int offsetToLast = _indexes.getOffsetToLast(); + while(true) { + ret += mV[off + it.value()]; + if(it.value() == offsetToLast) + break; it.next(); } + + preAgg.setValue(0, 0, ret); } + else + throw new NotImplementedException(); } private void preAggregateSparse(SparseBlock sb, MatrixBlock preAgg, int rl, int ru) { - final double[] preAV = preAgg.getDenseBlockValues(); - final int numVals = getNumValues(); - for(int rowLeft = rl, offOut = 0; rowLeft < ru; rowLeft++, offOut += numVals) { - if(sb.isEmpty(rowLeft)) - continue; - final AIterator it = _indexes.getIterator(); - final int apos = sb.pos(rowLeft); - final int alen = sb.size(rowLeft) + apos; - final int[] aix = sb.indexes(rowLeft); - final double[] avals = sb.values(rowLeft); + final AIterator it = _indexes.getIterator(); + if(rl == ru - 1) { + final int apos = sb.pos(rl); + final int alen = sb.size(rl) + apos; + final int[] aix = sb.indexes(rl); + final double[] avals = sb.values(rl); + final int offsetToLast = _indexes.getOffsetToLast(); + + double ret = 0; int j = apos; - while(it.hasNext() && j < alen) { - final int index = aix[j]; - final int v = it.value(); - if(index < v) - j++; - else if(index == v) { - preAV[offOut] += avals[j++]; + + while(true) { + final int idx = aix[j]; + + if(idx == it.value()) { + ret += avals[j++]; + if(j >= alen || it.value() >= offsetToLast) + break; it.next(); } - else + else if(idx < it.value()) { + j++; + if(j >= alen) + break; + } + else { + if(it.value() >= offsetToLast) + break; it.next(); + } } + + preAgg.setValue(0, 0, ret); } + else + throw new NotImplementedException(); } @Override @@ -271,9 +367,9 @@ public AColGroup scalarOperation(ScalarOperator op) { double val0 = op.executeScalar(0); boolean isSparseSafeOp = op.sparseSafe || val0 == 0; if(isSparseSafeOp) - return new ColGroupSDCSingleZeros(_colIndexes, _numRows, applyScalarOp(op), _indexes, getCachedCounts()); + return new ColGroupSDCSingleZeros(_colIndexes, _numRows, _dict.applyScalarOp(op), _indexes, getCachedCounts()); else { - ADictionary aDictionary = applyScalarOp(op, val0, getNumCols());// swapEntries(); + ADictionary aDictionary = _dict.applyScalarOp(op, val0, getNumCols());// swapEntries(); // ADictionary aDictionary = applyScalarOp(op, val0, getNumCols()); return new ColGroupSDCSingle(_colIndexes, _numRows, aDictionary, _indexes, null); } @@ -336,10 +432,15 @@ public boolean sameIndexStructure(AColGroupCompressed that) { public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) { final AIterator itThis = _indexes.getIterator(); final int nCol = that._colIndexes.length; - while(itThis.hasNext()) { + final int finalOffThis = _indexes.getOffsetToLast(); + + while(true) { final int fr = that._data.getIndex(itThis.value()); that._dict.addToEntry(ret, fr, 0, nCol); - itThis.next(); + if(itThis.value() >= finalOffThis) + break; + else + itThis.next(); } } @@ -348,26 +449,69 @@ public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary final AIterator itThat = that._indexes.getIterator(); final AIterator itThis = _indexes.getIterator(); final int nCol = that._colIndexes.length; - - while(itThat.hasNext() && itThis.hasNext()) { - final int v = itThat.value(); - if(v == itThis.skipTo(v)) - that._dict.addToEntry(ret, that.getIndex(itThat.getDataIndex()), 0, nCol); - - itThat.next(); + final int finalOffThis = _indexes.getOffsetToLast(); + final int finalOffThat = that._indexes.getOffsetToLast(); + + while(true) { + if(itThat.value() == itThis.value()) { + that._dict.addToEntry(ret, that._data.getIndex(itThat.getDataIndex()), 0, nCol); + if(itThat.value() >= finalOffThat) + break; + else + itThat.next(); + if(itThis.value() >= finalOffThis) + break; + else + itThis.next(); + } + else if(itThat.value() < itThis.value()) { + if(itThat.value() >= finalOffThat) + break; + else + itThat.next(); + } + else { + if(itThis.value() >= finalOffThis) + break; + else + itThis.next(); + } } } @Override public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) { + final int nCol = that._colIndexes.length; final AIterator itThat = that._indexes.getIterator(); final AIterator itThis = _indexes.getIterator(); - final int nCol = that._colIndexes.length; - while(itThat.hasNext()) { - final int v = itThat.value(); - if(v == itThis.skipTo(v)) + final int finalOffThis = _indexes.getOffsetToLast(); + final int finalOffThat = that._indexes.getOffsetToLast(); + + while(true) { + if(itThat.value() == itThis.value()) { that._dict.addToEntry(ret, 0, 0, nCol); - itThat.next(); + if(itThat.value() >= finalOffThat) + break; + else + itThat.next(); + if(itThis.value() >= finalOffThis) + break; + else + itThis.next(); + } + else if(itThat.value() < itThis.value()) { + if(itThat.value() >= finalOffThat) + break; + else + itThat.next(); + } + else { + if(itThis.value() >= finalOffThis) + break; + else + itThis.next(); + } + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java index a7632dd70ad..8fa9887b2f5 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java @@ -23,13 +23,9 @@ import java.io.DataOutput; import java.io.IOException; -import org.apache.commons.lang.NotImplementedException; -import org.apache.sysds.runtime.compress.DMLCompressionException; import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; -import org.apache.sysds.runtime.compress.colgroup.mapping.MapToByte; -import org.apache.sysds.runtime.compress.colgroup.mapping.MapToChar; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; import org.apache.sysds.runtime.compress.colgroup.offset.AIterator; import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; @@ -37,6 +33,7 @@ import org.apache.sysds.runtime.data.DenseBlock; import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.functionobjects.Builtin; +import org.apache.sysds.runtime.functionobjects.Plus; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.matrix.operators.BinaryOperator; import org.apache.sysds.runtime.matrix.operators.ScalarOperator; @@ -72,19 +69,20 @@ protected ColGroupSDCZeros(int numRows) { super(numRows); } - protected ColGroupSDCZeros(int[] colIndices, int numRows, ADictionary dict, AOffset offsets, AMapToData data) { - super(colIndices, numRows, dict, null); + private ColGroupSDCZeros(int[] colIndices, int numRows, ADictionary dict, AOffset offsets, AMapToData data, + int[] cachedCounts) { + super(colIndices, numRows, dict, cachedCounts); _indexes = offsets; _data = data; _zeros = true; } - protected ColGroupSDCZeros(int[] colIndices, int numRows, ADictionary dict, AOffset offsets, AMapToData data, + protected static AColGroup create(int[] colIndices, int numRows, ADictionary dict, AOffset offsets, AMapToData data, int[] cachedCounts) { - super(colIndices, numRows, dict, cachedCounts); - _indexes = offsets; - _data = data; - _zeros = true; + if(dict == null) + return new ColGroupEmpty(colIndices); + else + return new ColGroupSDCZeros(colIndices, numRows, dict, offsets, data, cachedCounts); } @Override @@ -100,129 +98,256 @@ public ColGroupType getColGroupType() { @Override protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, double[] values) { - final int nCol = _colIndexes.length; - AIterator it = _indexes.getIterator(rl); - while(it.hasNext() && it.value() < ru) { - final int idx = offR + it.value(); - final double[] c = db.values(idx); - final int off = db.pos(idx) + offC; - final int offDict = getIndex(it.getDataIndexAndIncrement()) * nCol; - for(int j = 0; j < nCol; j++) - c[off + _colIndexes[j]] += values[offDict + j]; + final AIterator it = _indexes.getIterator(rl); + if(it == null) + return; + if(it.value() >= ru) + _indexes.cacheIterator(it, ru); + else if(ru >= _indexes.getOffsetToLast()) { + final int lastOff = _indexes.getOffsetToLast(); + final int nCol = _colIndexes.length; + while(true) { + final int idx = offR + it.value(); + final double[] c = db.values(idx); + final int off = db.pos(idx) + offC; + final int offDict = _data.getIndex(it.getDataIndex()) * nCol; + for(int j = 0; j < nCol; j++) + c[off + _colIndexes[j]] += values[offDict + j]; + if(it.value() == lastOff) + return; + it.next(); + } + } + else { + + final int nCol = _colIndexes.length; + while(it.isNotOver(ru)) { + final int idx = offR + it.value(); + final double[] c = db.values(idx); + final int off = db.pos(idx) + offC; + final int offDict = _data.getIndex(it.getDataIndex()) * nCol; + for(int j = 0; j < nCol; j++) + c[off + _colIndexes[j]] += values[offDict + j]; + it.next(); + } + _indexes.cacheIterator(it, ru); } - _indexes.cacheIterator(it, ru); + } @Override protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, SparseBlock sb) { - AIterator it = _indexes.getIterator(rl); - while(it.hasNext() && it.value() < ru) { - final int idx = offR + it.value(); - final int dictIndex = getIndex(it.getDataIndexAndIncrement()); - if(sb.isEmpty(dictIndex)) - continue; - - final double[] c = db.values(idx); - final int off = db.pos(idx) + offC; - final int apos = sb.pos(dictIndex); - final int alen = sb.size(dictIndex) + apos; - final double[] avals = sb.values(dictIndex); - final int[] aix = sb.indexes(dictIndex); - for(int j = apos; j < alen; j++) - c[off + _colIndexes[aix[j]]] += avals[j]; + final AIterator it = _indexes.getIterator(rl); + if(it == null) + return; + else if(it.value() >= ru) + _indexes.cacheIterator(it, ru); + else if(ru >= _indexes.getOffsetToLast()) { + final int lastOff = _indexes.getOffsetToLast(); + while(true) { + final int idx = offR + it.value(); + final double[] c = db.values(idx); + final int dx = it.getDataIndex(); + final int dictIndex = _data.getIndex(dx); + if(sb.isEmpty(dictIndex)) { + if(it.value() == lastOff) + return; + it.next(); + continue; + } + + final int off = db.pos(idx) + offC; + final int apos = sb.pos(dictIndex); + final int alen = sb.size(dictIndex) + apos; + final double[] avals = sb.values(dictIndex); + final int[] aix = sb.indexes(dictIndex); + for(int j = apos; j < alen; j++) + c[off + _colIndexes[aix[j]]] += avals[j]; + if(it.value() == lastOff) + return; + it.next(); + } + } + else { + while(it.isNotOver(ru)) { + final int idx = offR + it.value(); + final int dx = it.getDataIndex(); + final int dictIndex = _data.getIndex(dx); + if(sb.isEmpty(dictIndex)) { + it.next(); + continue; + } + + final double[] c = db.values(idx); + final int off = db.pos(idx) + offC; + final int apos = sb.pos(dictIndex); + final int alen = sb.size(dictIndex) + apos; + final double[] avals = sb.values(dictIndex); + final int[] aix = sb.indexes(dictIndex); + for(int j = apos; j < alen; j++) + c[off + _colIndexes[aix[j]]] += avals[j]; + + it.next(); + } + _indexes.cacheIterator(it, ru); } - _indexes.cacheIterator(it, ru); } @Override protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, SparseBlock sb) { - AIterator it = _indexes.getIterator(rl); - while(it.hasNext() && it.value() < ru) { - final int row = offR + it.value(); - final int dictIndex = getIndex(it.getDataIndexAndIncrement()); - if(sb.isEmpty(dictIndex)) - continue; - - final int apos = sb.pos(dictIndex); - final int alen = sb.size(dictIndex) + apos; - final double[] avals = sb.values(dictIndex); - final int[] aix = sb.indexes(dictIndex); - for(int j = apos; j < alen; j++) - ret.append(row, _colIndexes[aix[j]] + offC, avals[j] ); + final AIterator it = _indexes.getIterator(rl); + if(it == null) + return; + else if(it.value() >= ru) + _indexes.cacheIterator(it, ru); + else if(ru >= _indexes.getOffsetToLast()) { + final int lastOff = _indexes.getOffsetToLast(); + while(true) { + final int row = offR + it.value(); + final int dx = it.getDataIndex(); + final int dictIndex = _data.getIndex(dx); + if(sb.isEmpty(dictIndex)) { + if(it.value() == lastOff) + return; + it.next(); + continue; + } + + final int apos = sb.pos(dictIndex); + final int alen = sb.size(dictIndex) + apos; + final double[] avals = sb.values(dictIndex); + final int[] aix = sb.indexes(dictIndex); + for(int j = apos; j < alen; j++) + ret.append(row, _colIndexes[aix[j]] + offC, avals[j]); + if(it.value() == lastOff) + return; + it.next(); + } + } + else { + while(it.isNotOver(ru)) { + final int row = offR + it.value(); + final int dx = it.getDataIndex(); + final int dictIndex = _data.getIndex(dx); + if(sb.isEmpty(dictIndex)) { + it.next(); + continue; + } + + final int apos = sb.pos(dictIndex); + final int alen = sb.size(dictIndex) + apos; + final double[] avals = sb.values(dictIndex); + final int[] aix = sb.indexes(dictIndex); + for(int j = apos; j < alen; j++) + ret.append(row, _colIndexes[aix[j]] + offC, avals[j]); + it.next(); + } + _indexes.cacheIterator(it, ru); } - _indexes.cacheIterator(it, ru); } @Override protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, double[] values) { - final int nCol = _colIndexes.length; + // LOG.error(ret); + final AIterator it = _indexes.getIterator(rl); + if(it == null) + return; + else if(it.value() >= ru) + _indexes.cacheIterator(it, ru); + else if(ru >= _indexes.getOffsetToLast()) { + final int lastOff = _indexes.getOffsetToLast(); + final int nCol = _colIndexes.length; + while(true) { + final int row = offR + it.value(); + final int dx = it.getDataIndex(); + final int offDict = _data.getIndex(dx) * nCol; + for(int j = 0; j < nCol; j++) + ret.append(row, _colIndexes[j] + offC, values[offDict + j]); + if(it.value() == lastOff) + return; + it.next(); + } + } + else { + + final int nCol = _colIndexes.length; + while(it.isNotOver(ru)) { + final int row = offR + it.value(); + final int dx = it.getDataIndex(); + final int offDict = _data.getIndex(dx) * nCol; + for(int j = 0; j < nCol; j++) + ret.append(row, _colIndexes[j] + offC, values[offDict + j]); - AIterator it = _indexes.getIterator(rl); - while(it.hasNext() && it.value() < ru) { - final int row = offR + it.value(); - final int offDict = getIndex(it.getDataIndexAndIncrement()) * nCol; - for(int j = 0; j < nCol; j++) - ret.append(row, _colIndexes[j] + offC, values[offDict + j]); + it.next(); + } + _indexes.cacheIterator(it, ru); } - _indexes.cacheIterator(it, ru); + } @Override public double getIdx(int r, int colIdx) { final AIterator it = _indexes.getIterator(r); + if(it == null || it.value() != r) + return 0; final int nCol = _colIndexes.length; - if(it.value() == r) - return _dict.getValue(getIndex(it.getDataIndex()) * nCol + colIdx); - else - return 0.0; + return _dict.getValue(_data.getIndex(it.getDataIndex()) * nCol + colIdx); } @Override - protected void computeRowSums(double[] c, boolean square, int rl, int ru) { - final double[] vals = _dict.sumAllRowsToDouble(square, _colIndexes.length); - final AIterator it = _indexes.getIterator(rl); - while(it.hasNext() && it.value() < ru) - c[it.value()] += vals[getIndex(it.getDataIndexAndIncrement())]; + protected void computeRowSums(double[] c, int rl, int ru) { + final double[] vals = _dict.sumAllRowsToDouble(_colIndexes.length); + computeRowSums(c, rl, ru, vals); } @Override - protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru) { - final double[] vals = _dict.aggregateTuples(builtin, _colIndexes.length); - final AIterator it = _indexes.getIterator(rl); - int rix = rl; - - for(; rix < ru && it.hasNext(); rix++) { - if(it.value() != rix) - c[rix] = builtin.execute(c[rix], 0); - else - c[rix] = builtin.execute(c[rix], vals[_data.getIndex(it.getDataIndexAndIncrement())]); + protected void computeRowSumsSq(double[] c, int rl, int ru) { + final double[] vals = _dict.sumAllRowsToDoubleSq(_colIndexes.length); + computeRowSums(c, rl, ru, vals); + } + + protected void computeRowSums(double[] c, int rl, int ru, double[] vals) { + computeRowSums(c, rl, ru, vals, _data, _indexes, _numRows); + } + + protected static final void computeRowSums(double[] c, int rl, int ru, double[] vals, AMapToData data, + AOffset indexes, int nRows) { + final AIterator it = indexes.getIterator(rl); + if(it == null) + return; + else if(it.value() > ru) + indexes.cacheIterator(it, ru); + else if(ru >= indexes.getOffsetToLast()) { + final int maxId = data.size() - 1; + c[it.value()] += vals[data.getIndex(it.getDataIndex())]; + while(it.getDataIndex() < maxId) { + it.next(); + c[it.value()] += vals[data.getIndex(it.getDataIndex())]; + } + } + else { + while(it.isNotOver(ru)) { + c[it.value()] += vals[data.getIndex(it.getDataIndex())]; + it.next(); + } + indexes.cacheIterator(it, ru); } - - // cover remaining rows with default value - for(; rix < ru; rix++) - c[rix] = builtin.execute(c[rix], 0); } @Override - public int[] getCounts(int[] counts) { - final int nonDefaultLength = _data.size(); - // final AIterator it = _indexes.getIterator(); - final int zeros = _numRows - nonDefaultLength; - for(int i = 0; i < nonDefaultLength; i++) - counts[_data.getIndex(i)]++; - - counts[counts.length - 1] += zeros; - - return counts; + protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru) { + final double[] vals = _dict.aggregateRows(builtin, _colIndexes.length); + ColGroupSDC.computeRowMxx(c, builtin, rl, ru, vals, _data, _indexes, _numRows, 0); } - public int getIndex(int r) { - return _data.getIndex(r); + @Override + public int[] getCounts(int[] counts) { + return _data.getCounts(counts, _numRows); } @Override @@ -235,82 +360,11 @@ public void preAggregate(MatrixBlock m, MatrixBlock preAgg, int rl, int ru) { @Override public void preAggregateDense(MatrixBlock m, MatrixBlock preAgg, int rl, int ru, int cl, int cu) { - - final int numVals = getNumValues(); - if(cl != 0 && cu != preAgg.getNumColumns()) - throw new NotImplementedException("Not implemented preAggregate of sub number of columns"); - if(_data instanceof MapToByte) - preAggregateDenseByte(m, preAgg, ((MapToByte) _data).getBytes(), rl, ru, cl, cu, _numRows, numVals, _indexes); - else if(_data instanceof MapToChar) - preAggregateDenseChar(m, preAgg, ((MapToChar) _data).getChars(), rl, ru, cl, cu, _numRows, numVals, _indexes); - else - throw new DMLCompressionException("Unsupported map type:" + _data); - - } - - private static void preAggregateDenseByte(final MatrixBlock m, final MatrixBlock preAgg, final byte[] d, - final int rl, final int ru, final int cl, final int cu, final int nRow, final int nVal, AOffset indexes) { - final double[] preAV = preAgg.getDenseBlockValues(); - final double[] mV = m.getDenseBlockValues(); - // multi row iterator. - final AIterator itStart = indexes.getIterator(cl); - AIterator it = null; - for(int rowLeft = rl, offOut = 0; rowLeft < ru; rowLeft++, offOut += nVal) { - final int offLeft = rowLeft * nRow; - it = itStart.clone(); - while(it.value() < cu && it.hasNext()) { - int i = it.value(); - int index = d[it.getDataIndexAndIncrement()] & 0xFF; - preAV[offOut + index] += mV[offLeft + i]; - } - } - if(it != null && cu < m.getNumColumns()) - indexes.cacheIterator(it, cu); - } - - private static void preAggregateDenseChar(final MatrixBlock m, final MatrixBlock preAgg, final char[] d, - final int rl, final int ru, final int cl, final int cu, final int nRow, final int nVal, AOffset indexes) { - final double[] preAV = preAgg.getDenseBlockValues(); - final double[] mV = m.getDenseBlockValues(); - // multi row iterator. - final AIterator itStart = indexes.getIterator(cl); - AIterator it = null; - for(int rowLeft = rl, offOut = 0; rowLeft < ru; rowLeft++, offOut += nVal) { - final int offLeft = rowLeft * nRow; - it = itStart.clone(); - while(it.value() < cu && it.hasNext()) { - int i = it.value(); - int index = d[it.getDataIndexAndIncrement()]; - preAV[offOut + index] += mV[offLeft + i]; - } - } - if(it != null && cu < m.getNumColumns()) - indexes.cacheIterator(it, cu); + _data.preAggregateDense(m, preAgg.getDenseBlockValues(), rl, ru, cl, cu, _indexes); } private void preAggregateSparse(SparseBlock sb, MatrixBlock preAgg, int rl, int ru) { - final double[] preAV = preAgg.getDenseBlockValues(); - final int numVals = getNumValues(); - for(int rowLeft = rl, offOut = 0; rowLeft < ru; rowLeft++, offOut += numVals) { - if(sb.isEmpty(rowLeft)) - continue; - final AIterator it = _indexes.getIterator(); - final int apos = sb.pos(rowLeft); - final int alen = sb.size(rowLeft) + apos; - final int[] aix = sb.indexes(rowLeft); - final double[] avals = sb.values(rowLeft); - int j = apos; - while(it.hasNext() && j < alen) { - final int index = aix[j]; - final int val = it.value(); - if(index < val) - j++; - else if(index == val) - preAV[offOut + _data.getIndex(it.getDataIndexAndIncrement())] += avals[j++]; - else - it.next(); - } - } + _data.preAggregateSparse(sb, preAgg.getDenseBlockValues(), rl, ru, _indexes); } @Override @@ -326,10 +380,10 @@ public AColGroup scalarOperation(ScalarOperator op) { double val0 = op.executeScalar(0); boolean isSparseSafeOp = op.sparseSafe || val0 == 0; if(isSparseSafeOp) - return new ColGroupSDCZeros(_colIndexes, _numRows, applyScalarOp(op), _indexes, _data, getCachedCounts()); + return create(_colIndexes, _numRows, _dict.applyScalarOp(op), _indexes, _data, getCachedCounts()); else { - ADictionary rValues = applyScalarOp(op, val0, getNumCols()); - return new ColGroupSDC(_colIndexes, _numRows, rValues, _indexes, _data, getCachedCounts()); + ADictionary rValues = _dict.applyScalarOp(op, val0, getNumCols()); + return ColGroupSDC.create(_colIndexes, _numRows, rValues, _indexes, _data, getCachedCounts()); } } @@ -337,11 +391,15 @@ public AColGroup scalarOperation(ScalarOperator op) { public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) { if(isRowSafe) { ADictionary ret = _dict.binOpLeft(op, v, _colIndexes); - return new ColGroupSDCZeros(_colIndexes, _numRows, ret, _indexes, _data, getCachedCounts()); + return create(_colIndexes, _numRows, ret, _indexes, _data, getCachedCounts()); + } + else if(op.fn instanceof Plus) { + double[] def = ColGroupUtils.binaryDefRowLeft(op, v, _colIndexes); + return ColGroupPFOR.create(_colIndexes, _numRows, _dict, _indexes, _data, getCachedCounts(), def); } else { ADictionary ret = _dict.applyBinaryRowOpLeftAppendNewEntry(op, v, _colIndexes); - return new ColGroupSDC(_colIndexes, _numRows, ret, _indexes, _data, getCachedCounts()); + return ColGroupSDC.create(_colIndexes, _numRows, ret, _indexes, _data, getCachedCounts()); } } @@ -349,11 +407,15 @@ public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSaf public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) { if(isRowSafe) { ADictionary ret = _dict.binOpRight(op, v, _colIndexes); - return new ColGroupSDCZeros(_colIndexes, _numRows, ret, _indexes, _data, getCachedCounts()); + return create(_colIndexes, _numRows, ret, _indexes, _data, getCachedCounts()); + } + else if(op.fn instanceof Plus) { + double[] def = ColGroupUtils.binaryDefRowRight(op, v, _colIndexes); + return ColGroupPFOR.create(_colIndexes, _numRows, _dict, _indexes, _data, getCachedCounts(), def); } else { ADictionary ret = _dict.applyBinaryRowOpRightAppendNewEntry(op, v, _colIndexes); - return new ColGroupSDC(_colIndexes, _numRows, ret, _indexes, _data, getCachedCounts()); + return ColGroupSDC.create(_colIndexes, _numRows, ret, _indexes, _data, getCachedCounts()); } } @@ -394,10 +456,15 @@ public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) { final AIterator itThis = _indexes.getIterator(); final int nCol = that._colIndexes.length; - while(itThis.hasNext()) { + final int finalOffThis = _indexes.getOffsetToLast(); + while(true) { final int fr = that._data.getIndex(itThis.value()); - final int to = getIndex(itThis.getDataIndexAndIncrement()); + final int to = _data.getIndex(itThis.getDataIndex()); that._dict.addToEntry(ret, fr, to, nCol); + if(itThis.value() >= finalOffThis) + break; + else + itThis.next(); } } @@ -405,17 +472,37 @@ public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) { public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) { final AIterator itThat = that._indexes.getIterator(); final AIterator itThis = _indexes.getIterator(); + + final int finalOffThis = _indexes.getOffsetToLast(); + final int finalOffThat = that._indexes.getOffsetToLast(); + final int nCol = that._colIndexes.length; - while(itThat.hasNext() && itThis.hasNext()) { + while(true) { if(itThat.value() == itThis.value()) { - final int fr = that.getIndex(itThat.getDataIndexAndIncrement()); - final int to = getIndex(itThis.getDataIndexAndIncrement()); + final int fr = that._data.getIndex(itThat.getDataIndex()); + final int to = _data.getIndex(itThis.getDataIndex()); that._dict.addToEntry(ret, fr, to, nCol); + if(itThat.value() >= finalOffThat) + break; + else + itThat.next(); + if(itThis.value() >= finalOffThis) + break; + else + itThis.next(); + } + else if(itThat.value() < itThis.value()) { + if(itThat.value() >= finalOffThat) + break; + else + itThat.next(); + } + else { + if(itThis.value() >= finalOffThis) + break; + else + itThis.next(); } - else if(itThat.value() < itThis.value()) - itThat.next(); - else - itThis.next(); } } @@ -425,16 +512,34 @@ public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, final AIterator itThis = _indexes.getIterator(); final int nCol = that._colIndexes.length; - while(itThat.hasNext() && itThis.hasNext()) { + final int finalOffThis = _indexes.getOffsetToLast(); + final int finalOffThat = that._indexes.getOffsetToLast(); + + while(true) { if(itThat.value() == itThis.value()) { - final int to = getIndex(itThis.getDataIndexAndIncrement()); + final int to = _data.getIndex(itThis.getDataIndex()); that._dict.addToEntry(ret, 0, to, nCol); - itThat.next(); + if(itThat.value() >= finalOffThat) + break; + else + itThat.next(); + if(itThis.value() >= finalOffThis) + break; + else + itThis.next(); + } + else if(itThat.value() < itThis.value()) { + if(itThat.value() >= finalOffThat) + break; + else + itThat.next(); + } + else { + if(itThis.value() >= finalOffThis) + break; + else + itThis.next(); } - else if(itThat.value() < itThis.value()) - itThat.next(); - else - itThis.next(); } } @@ -448,7 +553,7 @@ public AColGroup replace(double pattern, double replace) { private AColGroup replaceZero(double replace) { ADictionary replaced = _dict.replaceZeroAndExtend(replace, _colIndexes.length); - return new ColGroupSDC(_colIndexes, _numRows, replaced, _indexes, _data, getCachedCounts()); + return ColGroupSDC.create(_colIndexes, _numRows, replaced, _indexes, _data, getCachedCounts()); } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUtils.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUtils.java new file mode 100644 index 00000000000..f33d2dee293 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUtils.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.compress.colgroup; + +import org.apache.sysds.runtime.functionobjects.ValueFunction; +import org.apache.sysds.runtime.matrix.operators.BinaryOperator; + +public class ColGroupUtils { + + /** + * Calculate the result of performing the binary operation on an empty row to the left + * + * v op empty + * + * @param op The operator + * @param v The values to use on the left side of the operator + * @param colIndexes The column indexes to extract + * @return The result as a double array. + */ + protected final static double[] binaryDefRowLeft(BinaryOperator op, double[] v, int[] colIndexes) { + final ValueFunction fn = op.fn; + final int len = colIndexes.length; + final double[] ret = new double[len]; + for(int i = 0; i < len; i++) + ret[i] = fn.execute(v[colIndexes[i]], 0); + return ret; + } + + /** + * Calculate the result of performing the binary operation on an empty row to the right + * + * empty op v + * + * @param op The operator + * @param v The values to use on the left side of the operator + * @param colIndexes The column indexes to extract + * @return The result as a double array. + */ + protected final static double[] binaryDefRowRight(BinaryOperator op, double[] v, int[] colIndexes) { + final ValueFunction fn = op.fn; + final int len = colIndexes.length; + final double[] ret = new double[len]; + for(int i = 0; i < len; i++) + ret[i] = fn.execute(0, v[colIndexes[i]]); + return ret; + } + +} diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java index 79be408c17f..7ee7ed38d8a 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java @@ -70,6 +70,16 @@ public abstract class ADictionary implements Serializable { */ public abstract double aggregate(double init, Builtin fn); + /** + * Aggregate all the contained values, with a reference offset. + * + * @param init The initial value, in cases such as Max value this could be -infinity. + * @param fn The function to apply to the values + * @param reference The reference offset to each value in the dictionary + * @return The aggregated value as a double. + */ + public abstract double aggregate(double init, Builtin fn, double[] reference); + /** * Aggregate all entries in the rows. * @@ -77,7 +87,57 @@ public abstract class ADictionary implements Serializable { * @param nCol The number of columns contained in the dictionary. * @return Aggregates for this dictionary tuples. */ - public abstract double[] aggregateTuples(Builtin fn, int nCol); + public abstract double[] aggregateRows(Builtin fn, int nCol); + + /** + * Aggregate all entries in the rows with an offset value reference added. + * + * @param fn The aggregate function + * @param reference The reference offset to each value in the dictionary + * @return Aggregates for this dictionary tuples. + */ + public abstract double[] aggregateRows(Builtin fn, double[] reference); + + /** + * Aggregates the columns into the target double array provided. + * + * @param c The target double array, this contains the full number of columns, therefore the colIndexes for + * this specific dictionary is needed. + * @param fn The function to apply to individual columns + * @param colIndexes The mapping to the target columns from the individual columns + */ + public abstract void aggregateCols(double[] c, Builtin fn, int[] colIndexes); + + /** + * Aggregates the columns into the target double array provided. + * + * @param c The target double array, this contains the full number of columns, therefore the colIndexes for + * this specific dictionary is needed. + * @param fn The function to apply to individual columns + * @param reference The reference offset values to add to each cell. + * @param colIndexes The mapping to the target columns from the individual columns + */ + public abstract void aggregateCols(double[] c, Builtin fn, int[] colIndexes, double[] reference); + + /** + * Allocate a new dictionary and applies the scalar operation on each cell of the to then return the new. + * + * @param op The operator. + * @return The new dictionary to return. + */ + public abstract ADictionary applyScalarOp(ScalarOperator op); + + /** + * Allocate a new dictionary and apply the scalar operation on each cell to then return a new dictionary. + * + * outValues[j] = op(this.values[j] + reference[i]) - newReference[i] + * + * @param op The operator to apply to each cell. + * @param reference The reference value to add before the operator. + * @param newReference The reference value to subtract after the operator. + * @return A New Dictionary. + */ + public abstract ADictionary applyScalarOp(ScalarOperator op, double[] reference, double[] newReference); /** * Applies the scalar operation on the dictionary. Note that this operation modifies the underlying data, and @@ -109,6 +169,23 @@ public abstract class ADictionary implements Serializable { */ public abstract ADictionary binOpLeft(BinaryOperator op, double[] v, int[] colIndexes); + /** + * Apply the binary operator such that each value is offset by the reference before application. Then put the result + * into the new dictionary, but offset it by the new reference. + * + * outValues[j] = op(v[colIndexes[i]], this.values[j] + reference[i]) - newReference[i] + * + * + * @param op The operation to apply on the dictionary values. + * @param v The values to use on the left side of the operator. + * @param colIndexes The column indexes to use. + * @param reference The reference value to add before operator. + * @param newReference The reference value to subtract after operator. + * @return A new dictionary. + */ + public abstract ADictionary binOpLeft(BinaryOperator op, double[] v, int[] colIndexes, double[] reference, + double[] newReference); + /** * Apply binary row operation on the right side. * @@ -119,6 +196,22 @@ public abstract class ADictionary implements Serializable { */ public abstract ADictionary binOpRight(BinaryOperator op, double[] v, int[] colIndexes); + /** + * Apply the binary operator such that each value is offset by the reference before application. Then put the result + * into the new dictionary, but offset it by the new reference. + * + * outValues[j] = op(this.values[j] + reference[i], v[colIndexes[i]]) - newReference[i] + * + * @param op The operation to apply on the dictionary values. + * @param v The values to use on the right side of the operator. + * @param colIndexes The column indexes to use. + * @param reference The reference value to add before operator. + * @param newReference The reference value to subtract after operator. + * @return A new dictionary. + */ + public abstract ADictionary binOpRight(BinaryOperator op, double[] v, int[] colIndexes, double[] reference, + double[] newReference); + /** * Apply binary row operation on the left side and allocate a new dictionary. * @@ -131,7 +224,6 @@ public abstract class ADictionary implements Serializable { */ public abstract ADictionary applyBinaryRowOpLeftAppendNewEntry(BinaryOperator op, double[] v, int[] colIndexes); - /** * Apply binary row operation on this dictionary on the right side. * @@ -155,16 +247,6 @@ public abstract class ADictionary implements Serializable { */ public abstract ADictionary cloneAndExtend(int len); - /** - * Aggregates the columns into the target double array provided. - * - * @param c The target double array, this contains the full number of columns, therefore the colIndexes for - * this specific dictionary is needed. - * @param fn The function to apply to individual columns - * @param colIndexes The mapping to the target columns from the individual columns - */ - public abstract void aggregateCols(double[] c, Builtin fn, int[] colIndexes); - /** * Write the dictionary to a DataOutput. * @@ -200,21 +282,57 @@ public abstract class ADictionary implements Serializable { * * Note if the number of columns is one the actual dictionaries values are simply returned. * - * @param square If each entry should be squared. + * + * @param nrColumns The number of columns in the ColGroup to know how to get the values from the dictionary. + * @return a double array containing the row sums from this dictionary. + */ + public abstract double[] sumAllRowsToDouble(int nrColumns); + + /** + * Method used as a pre-aggregate of each tuple in the dictionary, to single double values. + * + * Note if the number of columns is one the actual dictionaries values are simply returned. + * * @param nrColumns The number of columns in the ColGroup to know how to get the values from the dictionary. * @return a double array containing the row sums from this dictionary. */ - public abstract double[] sumAllRowsToDouble(boolean square, int nrColumns); + public abstract double[] sumAllRowsToDoubleSq(int nrColumns); + + /** + * Method used as a pre-aggregate of each tuple in the dictionary, to single double values. + * + * @param reference The reference values to add to each cell. + * @return a double array containing the row sums from this dictionary. + */ + public abstract double[] sumAllRowsToDoubleSq(double[] reference); /** * Sum the values at a specific row. * * @param k The row index to sum - * @param square If each entry should be squared. * @param nrColumns The number of columns * @return The sum of the row. */ - public abstract double sumRow(int k, boolean square, int nrColumns); + public abstract double sumRow(int k, int nrColumns); + + /** + * Sum the values at a specific row. + * + * @param k The row index to sum + * @param nrColumns The number of columns + * @return The sum of the row. + */ + public abstract double sumRowSq(int k, int nrColumns); + + /** + * Sum the values at a specific row, with a reference array to scale the values. + * + * @param k The row index to sum + * @param nrColumns The number of columns + * @param reference The reference vector to add to each cell processed. + * @return The sum of the row. + */ + public abstract double sumRowSq(int k, int nrColumns, double[] reference); /** * get the column sum of this dictionary only. @@ -232,9 +350,29 @@ public abstract class ADictionary implements Serializable { * @param counts The counts of the individual tuples. * @param colIndexes The columns indexes of the parent column group, this indicate where to put the column sum into * the c output. - * @param square Specify if the values should be squared */ - public abstract void colSum(double[] c, int[] counts, int[] colIndexes, boolean square); + public abstract void colSum(double[] c, int[] counts, int[] colIndexes); + + /** + * Get the column sum of the values contained in the dictionary + * + * @param c The output array allocated to contain all column groups output. + * @param counts The counts of the individual tuples. + * @param colIndexes The columns indexes of the parent column group, this indicate where to put the column sum into + * the c output. + */ + public abstract void colSumSq(double[] c, int[] counts, int[] colIndexes); + + /** + * Get the column sum of the values contained in the dictionary with an offset reference value added to each cell. + * + * @param c The output array allocated to contain all column groups output. + * @param counts The counts of the individual tuples. + * @param colIndexes The columns indexes of the parent column group, this indicate where to put the column sum into + * the c output. + * @param reference The reference values to add to each cell. + */ + public abstract void colSumSq(double[] c, int[] counts, int[] colIndexes, double[] reference); /** * Get the sum of the values contained in the dictionary @@ -252,7 +390,16 @@ public abstract class ADictionary implements Serializable { * @param nCol The number of columns contained * @return The square sum scaled by the counts provided. */ - public abstract double sumsq(int[] counts, int nCol); + public abstract double sumSq(int[] counts, int nCol); + + /** + * Get the square sum of the values contained in the dictionary with a reference offset on each value. + * + * @param counts The counts of the individual tuples + * @param reference The reference value + * @return The square sum scaled by the counts and reference. + */ + public abstract double sumSq(int[] counts, double[] reference); /** * Get a string representation of the dictionary, that considers the layout of the data. @@ -298,6 +445,15 @@ public abstract class ADictionary implements Serializable { */ public abstract boolean containsValue(double pattern); + /** + * Detect if the dictionary contains a specific value with reference offset. + * + * @param pattern The pattern/ value to search for + * @param reference The reference double array. + * @return true if the value is contained else false. + */ + public abstract boolean containsValue(double pattern, double[] reference); + /** * Calculate the number of non zeros in the dictionary. The number of non zeros should be scaled with the counts * given. This gives the exact number of non zero values in the parent column group. @@ -308,6 +464,20 @@ public abstract class ADictionary implements Serializable { */ public abstract long getNumberNonZeros(int[] counts, int nCol); + /** + * Calculate the number of non zeros in the dictionary. + * + * Each value in the dictionary should be added to the reference value. + * + * The number of non zeros should be scaled with the given counts. + * + * @param counts The Counts of each dict entry. + * @param reference The reference vector. + * @param nRows The number of rows in the input. + * @return The NonZero Count. + */ + public abstract long getNumberNonZeros(int[] counts, double[] reference, int nRows); + /** * Copies and adds the dictionary entry from this dictionary to the d dictionary * @@ -380,6 +550,8 @@ public abstract ADictionary preaggValuesFromDense(final int numVals, final int[] */ public abstract ADictionary replace(double pattern, double replace, int nCol); + public abstract ADictionary replace(double pattern, double replace, double[] reference); + public abstract ADictionary replaceZeroAndExtend(double replace, int nCol); public abstract double product(int[] counts, int nCol); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java index 3707de70fd0..8f9a91b287e 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java @@ -80,7 +80,19 @@ public double aggregate(double init, Builtin fn) { } @Override - public double[] aggregateTuples(Builtin fn, final int nCol) { + public double aggregate(double init, Builtin fn, double[] reference) { + final int nCol = reference.length; + double ret = init; + for(int i = 0; i < _values.length; i++) + ret = fn.execute(ret, _values[i] + reference[i % nCol]); + + for(int i = 0; i < nCol; i++) + ret = fn.execute(ret, reference[i]); + return ret; + } + + @Override + public double[] aggregateRows(Builtin fn, int nCol) { if(nCol == 1) return _values; final int nRows = _values.length / nCol; @@ -94,9 +106,48 @@ public double[] aggregateTuples(Builtin fn, final int nCol) { return res; } + @Override + public double[] aggregateRows(Builtin fn, double[] reference) { + final int nCol = reference.length; + final int nRows = _values.length / nCol; + double[] res = new double[nRows + 1]; + int off = 0; + for(int i = 0; i < nRows; i++) { + res[i] = _values[off++] + reference[0]; + for(int j = 1; j < nCol; j++) + res[i] = fn.execute(res[i], _values[off++] + reference[j]); + } + res[nRows] = reference[0]; + for(int i = 0; i < nCol; i++) + res[nRows] = fn.execute(res[nRows], reference[i]); + return res; + } + + @Override + public Dictionary applyScalarOp(ScalarOperator op) { + final double[] retV = new double[_values.length]; + for(int i = 0; i < _values.length; i++) + retV[i] = op.executeScalar(_values[i]); + return new Dictionary(retV); + } + + @Override + public Dictionary applyScalarOp(ScalarOperator op, double[] reference, double[] newReference) { + final double[] retV = new double[_values.length]; + final int nCol = reference.length; + final int nRow = _values.length / nCol; + int off = 0; + for(int i = 0; i < nRow; i++) { + for(int j = 0; j < nCol; j++) { + retV[off] = op.executeScalar(_values[off] + reference[j]) - newReference[j]; + off++; + } + } + return new Dictionary(retV); + } + @Override public Dictionary inplaceScalarOp(ScalarOperator op) { - // in-place modification of the dictionary int len = size(); for(int i = 0; i < len; i++) _values[i] = op.executeScalar(_values[i]); @@ -125,6 +176,23 @@ public Dictionary binOpRight(BinaryOperator op, double[] v, int[] colIndexes) { return new Dictionary(retVals); } + @Override + public Dictionary binOpRight(BinaryOperator op, double[] v, int[] colIndexes, double[] reference, + double[] newReference) { + final ValueFunction fn = op.fn; + final double[] retV = new double[_values.length]; + final int nCol = reference.length; + final int nRow = _values.length / nCol; + int off = 0; + for(int i = 0; i < nRow; i++) { + for(int j = 0; j < nCol; j++) { + retV[off] = fn.execute(_values[off] + reference[j], v[colIndexes[j]]) - newReference[j]; + off++; + } + } + return new Dictionary(retV); + } + @Override public final Dictionary binOpLeft(BinaryOperator op, double[] v, int[] colIndexes) { final ValueFunction fn = op.fn; @@ -136,9 +204,26 @@ public final Dictionary binOpLeft(BinaryOperator op, double[] v, int[] colIndexe return new Dictionary(retVals); } + @Override + public Dictionary binOpLeft(BinaryOperator op, double[] v, int[] colIndexes, double[] reference, + double[] newReference) { + final ValueFunction fn = op.fn; + final double[] retV = new double[_values.length]; + final int nCol = reference.length; + final int nRow = _values.length / nCol; + int off = 0; + for(int i = 0; i < nRow; i++) { + for(int j = 0; j < nCol; j++) { + retV[off] = fn.execute(v[colIndexes[j]], _values[off] + reference[j]) - newReference[j]; + off++; + } + } + return new Dictionary(retV); + } + @Override public Dictionary applyBinaryRowOpRightAppendNewEntry(BinaryOperator op, double[] v, int[] colIndexes) { - ValueFunction fn = op.fn; + final ValueFunction fn = op.fn; final int len = size(); final int lenV = colIndexes.length; final double[] values = new double[len + lenV]; @@ -152,7 +237,7 @@ public Dictionary applyBinaryRowOpRightAppendNewEntry(BinaryOperator op, double[ @Override public final Dictionary applyBinaryRowOpLeftAppendNewEntry(BinaryOperator op, double[] v, int[] colIndexes) { - ValueFunction fn = op.fn; + final ValueFunction fn = op.fn; final int len = size(); final int lenV = colIndexes.length; final double[] values = new double[len + lenV]; @@ -207,34 +292,67 @@ public int getNumberOfValues(int nCol) { } @Override - public double[] sumAllRowsToDouble(boolean square, int nrColumns) { - if(nrColumns == 1 && !square) + public double[] sumAllRowsToDouble(int nrColumns) { + if(nrColumns == 1) return getValues(); // shallow copy of values // pre-aggregate value tuple final int numVals = getNumberOfValues(nrColumns); double[] ret = new double[numVals]; - for(int k = 0; k < numVals; k++) { - ret[k] = sumRow(k, square, nrColumns); - } + for(int k = 0; k < numVals; k++) + ret[k] = sumRow(k, nrColumns); + + return ret; + } + + @Override + public double[] sumAllRowsToDoubleSq(int nrColumns) { + // pre-aggregate value tuple + final int numVals = getNumberOfValues(nrColumns); + double[] ret = new double[numVals]; + for(int k = 0; k < numVals; k++) + ret[k] = sumRowSq(k, nrColumns); return ret; } @Override - public double sumRow(int k, boolean square, int nrColumns) { + public double[] sumAllRowsToDoubleSq(double[] reference) { + final int nCol = reference.length; + final int numVals = getNumberOfValues(nCol); + double[] ret = new double[numVals + 1]; + for(int k = 0; k < numVals; k++) + ret[k] = sumRowSq(k, nCol, reference); + for(int i = 0; i < nCol; i++) + ret[numVals] += reference[i] * reference[i]; + return ret; + } - int valOff = k * nrColumns; + @Override + public double sumRow(int k, int nrColumns) { + final int valOff = k * nrColumns; double res = 0.0; - if(!square) { - for(int i = 0; i < nrColumns; i++) { - res += _values[valOff + i]; - } - } - else { - // kSquare - for(int i = 0; i < nrColumns; i++) - res += _values[valOff + i] * _values[valOff + i]; + for(int i = 0; i < nrColumns; i++) + res += _values[valOff + i]; + return res; + } + + @Override + public double sumRowSq(int k, int nrColumns) { + final int valOff = k * nrColumns; + double res = 0.0; + for(int i = 0; i < nrColumns; i++) + res += _values[valOff + i] * _values[valOff + i]; + return res; + } + + @Override + public double sumRowSq(int k, int nrColumns, double[] reference) { + final int valOff = k * nrColumns; + double res = 0.0; + for(int i = 0; i < nrColumns; i++) { + final double v = _values[valOff + i] + reference[i]; + res += v * v; } return res; } @@ -252,44 +370,89 @@ public double[] colSum(int[] counts, int nCol) { } @Override - public void colSum(double[] c, int[] counts, int[] colIndexes, boolean square) { - for(int k = 0; k < _values.length / colIndexes.length; k++) { + public void colSum(double[] c, int[] counts, int[] colIndexes) { + final int nCol = colIndexes.length; + for(int k = 0; k < _values.length / nCol; k++) { final int cntk = counts[k]; - for(int j = 0; j < colIndexes.length; j++) { - double v = _values[k * colIndexes.length + j]; - if(square) - c[colIndexes[j]] += v * v * cntk; - else - c[colIndexes[j]] += v * cntk; + final int off = k * nCol; + for(int j = 0; j < nCol; j++) + c[colIndexes[j]] += _values[off + j] * cntk; + } + } + + @Override + public void colSumSq(double[] c, int[] counts, int[] colIndexes) { + final int nCol = colIndexes.length; + final int nRow = _values.length / nCol; + int off = 0; + for(int k = 0; k < nRow; k++) { + final int cntk = counts[k]; + for(int j = 0; j < nCol; j++) { + final double v = _values[off++]; + c[colIndexes[j]] += v * v * cntk; } } + } + @Override + public void colSumSq(double[] c, int[] counts, int[] colIndexes, double[] reference) { + final int nCol = colIndexes.length; + final int nRow = _values.length / nCol; + int off = 0; + for(int k = 0; k < nRow; k++) { + final int cntk = counts[k]; + for(int j = 0; j < nCol; j++) { + final double v = _values[off++] + reference[j]; + c[colIndexes[j]] += v * v * cntk; + } + } + for(int i = 0; i < nCol; i++) + c[colIndexes[i]] += reference[i] * reference[i] * counts[nRow]; } @Override - public double sum(int[] counts, int ncol) { + public double sum(int[] counts, int nCol) { double out = 0; int valOff = 0; - for(int k = 0; k < _values.length / ncol; k++) { + for(int k = 0; k < _values.length / nCol; k++) { int countK = counts[k]; - for(int j = 0; j < ncol; j++) { - out += getValue(valOff++) * countK; + for(int j = 0; j < nCol; j++) { + out += _values[valOff++] * countK; } } return out; } @Override - public double sumsq(int[] counts, int ncol) { + public double sumSq(int[] counts, int nCol) { double out = 0; int valOff = 0; - for(int k = 0; k < _values.length / ncol; k++) { - int countK = counts[k]; - for(int j = 0; j < ncol; j++) { - double val = getValue(valOff++); + for(int k = 0; k < _values.length / nCol; k++) { + final int countK = counts[k]; + for(int j = 0; j < nCol; j++) { + final double val = _values[valOff++]; + out += val * val * countK; + } + } + return out; + } + + @Override + public double sumSq(int[] counts, double[] reference) { + final int nCol = reference.length; + final int nRow = _values.length / nCol; + double out = 0; + int valOff = 0; + for(int k = 0; k < nRow; k++) { + final int countK = counts[k]; + for(int j = 0; j < nCol; j++) { + final double val = _values[valOff++] + reference[j]; out += val * val * countK; } } + for(int i = 0; i < nCol; i++) + out += reference[i] * reference[i] * counts[nRow]; + return out; } @@ -383,6 +546,15 @@ public boolean containsValue(double pattern) { return false; } + @Override + public boolean containsValue(double pattern, double[] reference) { + final int nCol = reference.length; + for(int i = 0; i < _values.length; i++) + if(_values[i] + reference[i % nCol] == pattern) + return true; + return false; + } + @Override public long getNumberNonZeros(int[] counts, int nCol) { long nnz = 0; @@ -399,6 +571,27 @@ public long getNumberNonZeros(int[] counts, int nCol) { return nnz; } + @Override + public long getNumberNonZeros(int[] counts, double[] reference, int nRows) { + long nnz = 0; + final int nCol = reference.length; + final int nRow = _values.length / nCol; + for(int i = 0; i < nRow; i++) { + long rowCount = 0; + final int off = i * nCol; + for(int j = off, jj = 0; j < off + nCol; j++, jj++) { + if(_values[j] + reference[jj] != 0) + rowCount++; + } + nnz += rowCount * counts[i]; + } + for(int i = 0; i < nCol; i++) + if(reference[i] != 0) + nnz += counts[nRow]; + + return nnz; + } + @Override public void addToEntry(Dictionary d, int fr, int to, int nCol) { final int sf = nCol * fr; // start from @@ -446,12 +639,22 @@ public MatrixBlockDictionary getMBDict(int nCol) { @Override public void aggregateCols(double[] c, Builtin fn, int[] colIndexes) { - int ncol = colIndexes.length; - int vlen = size() / ncol; - for(int k = 0; k < vlen; k++) - for(int j = 0, valOff = k * ncol; j < ncol; j++) - c[colIndexes[j]] = fn.execute(c[colIndexes[j]], getValue(valOff + j)); + final int nCol = colIndexes.length; + final int rlen = _values.length / nCol; + for(int k = 0; k < rlen; k++) + for(int j = 0, valOff = k * nCol; j < nCol; j++) + c[colIndexes[j]] = fn.execute(c[colIndexes[j]], _values[valOff + j]); + } + @Override + public void aggregateCols(double[] c, Builtin fn, int[] colIndexes, double[] reference) { + final int nCol = reference.length; + final int rlen = _values.length / nCol; + for(int k = 0; k < rlen; k++) + for(int j = 0, valOff = k * nCol; j < nCol; j++) + c[colIndexes[j]] = fn.execute(c[colIndexes[j]], _values[valOff + j] + reference[j]); + for(int i = 0; i < nCol; i++) + c[colIndexes[i]] = fn.execute(c[colIndexes[i]], reference[i]); } @Override @@ -488,10 +691,23 @@ public ADictionary replace(double pattern, double replace, int nCol) { double[] retV = new double[_values.length]; for(int i = 0; i < _values.length; i++) { final double v = _values[i]; - if(v == pattern) - retV[i] = replace; - else - retV[i] = v; + retV[i] = v == pattern ? replace : v; + } + return new Dictionary(retV); + } + + @Override + public ADictionary replace(double pattern, double replace, double[] reference) { + final double[] retV = new double[_values.length]; + final int nCol = reference.length; + final int nRow = _values.length / nCol; + int off = 0; + for(int i = 0; i < nRow; i++) { + for(int j = 0; j < nCol; j++) { + final double v = _values[off]; + retV[off++] = v + reference[j] == pattern ? replace - reference[j] : v; + + } } return new Dictionary(retV); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictionaryFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictionaryFactory.java index 1db433c5c29..982c3c903c6 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictionaryFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictionaryFactory.java @@ -171,6 +171,8 @@ public static ADictionary moveFrequentToLastDictionaryEntry(ADictionary dict, AB else if(mb.isInSparseFormat()) { MatrixBlockDictionary mbdn = moveToLastDictionaryEntrySparse(mb.getSparseBlock(), largestIndex, zeros, nCol, largestIndexSize); + if(mbdn == null) + return null; MatrixBlock mbn = mbdn.getMatrixBlock(); mbn.setNonZeros(mb.getNonZeros()); if(mbn.getNonZeros() == 0) @@ -196,6 +198,8 @@ private static MatrixBlockDictionary moveToLastDictionaryEntrySparse(SparseBlock for(int i = indexToMove + 1; i < sb.numRows(); i++) sb.set(i - 1, sb.get(i), false); sb.set(sb.numRows() - 1, swap, false); + if(ret.isEmpty()) + return null; return new MatrixBlockDictionary(ret); } @@ -214,6 +218,8 @@ private static MatrixBlockDictionary moveToLastDictionaryEntrySparse(SparseBlock for(int i = indexToMove + 1; i < sb.numRows(); i++) retB.set(i - 1, sb.get(i), false); } + if(ret.isEmpty()) + return null; return new MatrixBlockDictionary(ret); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/MatrixBlockDictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/MatrixBlockDictionary.java index b3fa6f7e09f..b9fc6868ea6 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/MatrixBlockDictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/MatrixBlockDictionary.java @@ -25,6 +25,7 @@ import java.util.Arrays; import org.apache.commons.lang.NotImplementedException; +import org.apache.sysds.runtime.compress.DMLCompressionException; import org.apache.sysds.runtime.compress.utils.Util; import org.apache.sysds.runtime.data.DenseBlock; import org.apache.sysds.runtime.data.DenseBlockFP64; @@ -45,10 +46,14 @@ public class MatrixBlockDictionary extends ADictionary { public MatrixBlockDictionary(double[] values, int nCol) { _data = Util.matrixBlockFromDenseArray(values, nCol); + if(_data.isEmpty()) + throw new DMLCompressionException("Invalid construction of empty dictionary"); } public MatrixBlockDictionary(MatrixBlock data) { _data = data; + if(_data.isEmpty()) + throw new DMLCompressionException("Invalid construction of empty dictionary"); } public MatrixBlock getMatrixBlock() { @@ -93,7 +98,45 @@ else if(fn.getBuiltinCode() == BuiltinCode.MIN) } @Override - public double[] aggregateTuples(Builtin fn, int nCol) { + public double aggregate(double init, Builtin fn, double[] reference) { + final int nCol = reference.length; + final int nRows = _data.getNumRows(); + double ret = init; + + for(int i = 0; i < nCol; i++) + ret = fn.execute(ret, reference[i]); + + if(!_data.isEmpty() && _data.isInSparseFormat()) { + final SparseBlock sb = _data.getSparseBlock(); + for(int i = 0; i < nRows; i++) { + if(sb.isEmpty(i)) + continue; + final int apos = sb.pos(i); + final int alen = sb.size(i) + apos; + final int[] aix = sb.indexes(i); + final double[] avals = sb.values(i); + for(int k = apos; k < alen; k++) { + final double v = avals[k] + reference[aix[k]]; + ret = fn.execute(ret, v); + } + } + } + else if(!_data.isEmpty()) { + final double[] values = _data.getDenseBlockValues(); + int off = 0; + for(int k = 0; k < nRows; k++) { + for(int j = 0; j < _data.getNumColumns(); j++) { + final double v = values[off++] + reference[j]; + ret = fn.execute(ret, v); + } + } + } + + return ret; + } + + @Override + public double[] aggregateRows(Builtin fn, int nCol) { double[] ret = new double[_data.getNumRows()]; if(_data.isEmpty()) return ret; @@ -129,6 +172,53 @@ else if(nCol == 1) return ret; } + @Override + public double[] aggregateRows(Builtin fn, double[] reference) { + final int nCol = reference.length; + final int nRows = _data.getNumRows(); + final double[] ret = new double[nRows + 1]; + + ret[nRows] = reference[0]; + for(int i = 1; i < nCol; i++) + ret[nRows] = fn.execute(ret[nRows], reference[i]); + + if(!_data.isEmpty() && _data.isInSparseFormat()) { + final SparseBlock sb = _data.getSparseBlock(); + for(int i = 0; i < nRows; i++) { + if(sb.isEmpty(i)) + ret[i] = ret[nRows]; + else { + final int apos = sb.pos(i); + final int alen = sb.size(i) + apos; + final int[] aix = sb.indexes(i); + final double[] avals = sb.values(i); + int k = apos; + int j = 1; + ret[i] = (aix[k] == 0) ? avals[k++] + reference[0] : reference[0]; + for(; j < _data.getNumColumns() && k < alen; j++) { + final double v = aix[k] == j ? avals[k++] + reference[j] : reference[j]; + ret[i] = fn.execute(ret[i], v); + } + for(; j < _data.getNumColumns(); j++) + ret[i] = fn.execute(ret[i], reference[j]); + } + } + } + else if(!_data.isEmpty()) { + final double[] values = _data.getDenseBlockValues(); + int off = 0; + for(int k = 0; k < nRows; k++) { + ret[k] = values[off++] + reference[0]; + for(int j = 1; j < _data.getNumColumns(); j++) { + final double v = values[off++] + reference[j]; + ret[k] = fn.execute(ret[k], v); + } + } + } + + return ret; + } + @Override public void aggregateCols(double[] c, Builtin fn, int[] colIndexes) { if(_data.isEmpty()) { @@ -172,9 +262,102 @@ else if(_data.isInSparseFormat()) { } @Override - public ADictionary inplaceScalarOp(ScalarOperator op) { + public void aggregateCols(double[] c, Builtin fn, int[] colIndexes, double[] reference) { + final int nCol = _data.getNumColumns(); + final int nRow = _data.getNumRows(); + + for(int j = 0; j < colIndexes.length; j++) { + final int idx = colIndexes[j]; + c[idx] = fn.execute(c[idx], reference[j]); + } + if(!_data.isEmpty() && _data.isInSparseFormat()) { + final SparseBlock sb = _data.getSparseBlock(); + for(int i = 0; i < nRow; i++) { + if(sb.isEmpty(i)) + continue; + final int apos = sb.pos(i); + final int alen = sb.size(i) + apos; + final double[] avals = sb.values(i); + final int[] aix = sb.indexes(i); + // This is a cool trick but it only works with min / max. + for(int k = apos; k < alen; k++) { + final int idx = colIndexes[aix[k]]; + c[idx] = fn.execute(c[idx], avals[k] + reference[aix[k]]); + } + } + } + else if(!_data.isEmpty()) { + final double[] values = _data.getDenseBlockValues(); + int off = 0; + for(int k = 0; k < nRow; k++) { + for(int j = 0; j < nCol; j++) { + final int idx = colIndexes[j]; + c[idx] = fn.execute(c[idx], values[off++] + reference[j]); + } + } + } + } + + @Override + public ADictionary applyScalarOp(ScalarOperator op) { MatrixBlock res = _data.scalarOperations(op, new MatrixBlock()); - return new MatrixBlockDictionary(res); + if(res.isEmpty()) + return null; + else + return new MatrixBlockDictionary(res); + } + + @Override + public ADictionary applyScalarOp(ScalarOperator op, double[] reference, double[] newReference) { + final int nCol = _data.getNumColumns(); + final int nRow = _data.getNumRows(); + final MatrixBlock ret = new MatrixBlock(nRow, nCol, false); + ret.allocateDenseBlock(); + final double[] retV = ret.getDenseBlockValues(); + int off = 0; + if(_data.isInSparseFormat()) { + final SparseBlock sb = _data.getSparseBlock(); + for(int i = 0; i < nRow; i++) { + if(sb.isEmpty(i)) + for(int j = 0; j < nCol; j++) + retV[off++] = op.executeScalar(reference[j]) - newReference[j]; + else { + final int apos = sb.pos(i); + final int alen = sb.size(i) + apos; + final int[] aix = sb.indexes(i); + final double[] avals = sb.values(i); + int j = 0; + for(int k = apos; j < nCol && k < alen; j++) { + final double v = aix[k] == j ? avals[k++] + reference[j] : reference[j]; + retV[off++] = op.executeScalar(v) - newReference[j]; + } + for(; j < nCol; j++) + retV[off++] = op.executeScalar(reference[j]) - newReference[j]; + } + } + } + else { + final double[] values = _data.getDenseBlockValues(); + for(int i = 0; i < nRow; i++) { + for(int j = 0; j < nCol; j++) { + retV[off] = op.executeScalar(values[off] + reference[j]) - newReference[j]; + off++; + } + } + } + + ret.recomputeNonZeros(); + ret.examSparsity(); + if(ret.isEmpty()) + return null; + else + return new MatrixBlockDictionary(ret); + + } + + @Override + public ADictionary inplaceScalarOp(ScalarOperator op) { + throw new NotImplementedException(); } @Override @@ -182,15 +365,16 @@ public ADictionary applyScalarOp(ScalarOperator op, double newVal, int numCols) MatrixBlock res = _data.scalarOperations(op, new MatrixBlock()); final int lastRow = res.getNumRows(); MatrixBlock res2 = new MatrixBlock(lastRow + 1, res.getNumColumns(), true); - if(res.isEmpty()) { + if(res.isEmpty()) for(int i = 0; i < numCols; i++) res2.appendValue(lastRow, i, newVal); - return new MatrixBlockDictionary(res2); - } - else { + else res.append(new MatrixBlock(1, numCols, newVal), res2, false); + + if(res2.isEmpty()) + return null; + else return new MatrixBlockDictionary(res2); - } } @Override @@ -199,6 +383,12 @@ public ADictionary binOpLeft(BinaryOperator op, double[] v, int[] colIndexes) { return new MatrixBlockDictionary(rowVector.binaryOperations(op, _data, null)); } + @Override + public Dictionary binOpLeft(BinaryOperator op, double[] v, int[] colIndexes, double[] reference, + double[] newReference) { + throw new NotImplementedException(); + } + @Override public ADictionary applyBinaryRowOpLeftAppendNewEntry(BinaryOperator op, double[] v, int[] colIndexes) { MatrixBlock rowVector = Util.extractValues(v, colIndexes); @@ -212,6 +402,12 @@ public ADictionary binOpRight(BinaryOperator op, double[] v, int[] colIndexes) { return new MatrixBlockDictionary(_data.binaryOperations(op, rowVector, null)); } + @Override + public Dictionary binOpRight(BinaryOperator op, double[] v, int[] colIndexes, double[] reference, + double[] newReference) { + throw new NotImplementedException(); + } + @Override public ADictionary applyBinaryRowOpRightAppendNewEntry(BinaryOperator op, double[] v, int[] colIndexes) { MatrixBlock rowVector = Util.extractValues(v, colIndexes); @@ -242,7 +438,7 @@ public int getNumberOfValues(int ncol) { } @Override - public double[] sumAllRowsToDouble(boolean square, int nrColumns) { + public double[] sumAllRowsToDouble(int nrColumns) { double[] ret = new double[_data.getNumRows()]; if(_data.isEmpty()) @@ -255,7 +451,7 @@ else if(_data.isInSparseFormat()) { final int alen = sb.size(i) + apos; final double[] avals = sb.values(i); for(int j = apos; j < alen; j++) { - ret[i] += (square) ? avals[j] * avals[j] : avals[j]; + ret[i] += avals[j]; } } } @@ -266,7 +462,7 @@ else if(_data.isInSparseFormat()) { for(int k = 0; k < _data.getNumRows(); k++) { for(int j = 0; j < _data.getNumColumns(); j++) { final double v = values[off++]; - ret[k] += (square) ? v * v : v; + ret[k] += v; } } } @@ -274,7 +470,95 @@ else if(_data.isInSparseFormat()) { } @Override - public double sumRow(int k, boolean square, int nrColumns) { + public double[] sumAllRowsToDoubleSq(int nrColumns) { + final double[] ret = new double[_data.getNumRows()]; + + if(_data.isEmpty()) + return ret; + else if(_data.isInSparseFormat()) { + SparseBlock sb = _data.getSparseBlock(); + for(int i = 0; i < _data.getNumRows(); i++) { + if(!sb.isEmpty(i)) { + final int apos = sb.pos(i); + final int alen = sb.size(i) + apos; + final double[] avals = sb.values(i); + for(int j = apos; j < alen; j++) { + ret[i] += avals[j] * avals[j]; + } + } + } + } + else { + double[] values = _data.getDenseBlockValues(); + int off = 0; + for(int k = 0; k < _data.getNumRows(); k++) { + for(int j = 0; j < _data.getNumColumns(); j++) { + final double v = values[off++]; + ret[k] += v * v; + } + } + } + return ret; + } + + @Override + public double[] sumAllRowsToDoubleSq(double[] reference) { + final int nCol = reference.length; + final int numVals = _data.getNumRows(); + final double[] ret = new double[numVals + 1]; + + final int finalIndex = numVals; + for(int i = 0; i < nCol; i++) + ret[finalIndex] += reference[i] * reference[i]; + + if(!_data.isEmpty() && _data.isInSparseFormat()) { + final SparseBlock sb = _data.getSparseBlock(); + for(int i = 0; i < numVals; i++) { + if(sb.isEmpty(i)) + ret[i] = ret[finalIndex]; + else { + final int apos = sb.pos(i); + final int alen = sb.size(i) + apos; + final int[] aix = sb.indexes(i); + final double[] avals = sb.values(i); + int k = apos; + int j = 0; + for(; j < _data.getNumColumns() && k < alen; j++) { + final double v = aix[k] == j ? avals[k++] + reference[j] : reference[j]; + ret[i] += v * v; + } + for(; j < _data.getNumColumns(); j++) + ret[i] += reference[j] * reference[j]; + } + + } + } + else if(!_data.isEmpty()) { + double[] values = _data.getDenseBlockValues(); + int off = 0; + for(int k = 0; k < numVals; k++) { + for(int j = 0; j < _data.getNumColumns(); j++) { + final double v = values[off++] + reference[j]; + ret[k] += v * v; + } + } + } + + return ret; + } + + @Override + public double sumRow(int k, int nrColumns) { + throw new NotImplementedException(); + } + + @Override + public double sumRowSq(int k, int nrColumns) { + throw new NotImplementedException(); + } + + @Override + public double sumRowSq(int k, int nrColumns, double[] reference) { throw new NotImplementedException(); } @@ -314,7 +598,40 @@ public double[] colSum(int[] counts, int nCol) { } @Override - public void colSum(double[] c, int[] counts, int[] colIndexes, boolean square) { + public void colSum(double[] c, int[] counts, int[] colIndexes) { + if(_data.isEmpty()) + return; + if(_data.isInSparseFormat()) { + SparseBlock sb = _data.getSparseBlock(); + for(int i = 0; i < _data.getNumRows(); i++) { + if(!sb.isEmpty(i)) { + // double tmpSum = 0; + final int count = counts[i]; + final int apos = sb.pos(i); + final int alen = sb.size(i) + apos; + final int[] aix = sb.indexes(i); + final double[] avals = sb.values(i); + for(int j = apos; j < alen; j++) { + c[colIndexes[aix[j]]] += count * avals[j]; + } + } + } + } + else { + double[] values = _data.getDenseBlockValues(); + int off = 0; + for(int k = 0; k < _data.getNumRows(); k++) { + final int countK = counts[k]; + for(int j = 0; j < _data.getNumColumns(); j++) { + final double v = values[off++]; + c[colIndexes[j]] += v * countK; + } + } + } + } + + @Override + public void colSumSq(double[] c, int[] counts, int[] colIndexes) { if(_data.isEmpty()) return; if(_data.isInSparseFormat()) { @@ -328,7 +645,7 @@ public void colSum(double[] c, int[] counts, int[] colIndexes, boolean square) { final int[] aix = sb.indexes(i); final double[] avals = sb.values(i); for(int j = apos; j < alen; j++) { - c[colIndexes[aix[j]]] += square ? count * avals[j] * avals[j] : count * avals[j]; + c[colIndexes[aix[j]]] += count * avals[j] * avals[j]; } } } @@ -340,7 +657,50 @@ public void colSum(double[] c, int[] counts, int[] colIndexes, boolean square) { final int countK = counts[k]; for(int j = 0; j < _data.getNumColumns(); j++) { final double v = values[off++]; - c[colIndexes[j]] += square ? v * v * countK : v * countK; + c[colIndexes[j]] += v * v * countK; + } + } + } + } + + @Override + public void colSumSq(double[] c, int[] counts, int[] colIndexes, double[] reference) { + final int nCol = reference.length; + final int nRow = _data.getNumRows(); + for(int i = 0; i < nCol; i++) + c[colIndexes[i]] += reference[i] * reference[i] * counts[nRow]; + + if(!_data.isEmpty() && _data.isInSparseFormat()) { + final SparseBlock sb = _data.getSparseBlock(); + for(int i = 0; i < nRow; i++) { + final int countK = counts[i]; + if(sb.isEmpty(i)) + for(int j = 0; j < nCol; j++) + c[colIndexes[j]] += reference[j] * reference[j] * countK; + else { + final int apos = sb.pos(i); + final int alen = sb.size(i) + apos; + final int[] aix = sb.indexes(i); + final double[] avals = sb.values(i); + int k = apos; + int j = 0; + for(; j < _data.getNumColumns() && k < alen; j++) { + final double v = aix[k] == j ? avals[k++] + reference[j] : reference[j]; + c[colIndexes[j]] += v * v * countK; + } + for(; j < _data.getNumColumns(); j++) + c[colIndexes[j]] += reference[j] * reference[j] * countK; + } + } + } + else if(!_data.isEmpty()) { + double[] values = _data.getDenseBlockValues(); + int off = 0; + for(int k = 0; k < nRow; k++) { + final int countK = counts[k]; + for(int j = 0; j < _data.getNumColumns(); j++) { + final double v = values[off++] + reference[j]; + c[colIndexes[j]] += v * v * countK; } } } @@ -380,7 +740,7 @@ public double sum(int[] counts, int ncol) { } @Override - public double sumsq(int[] counts, int ncol) { + public double sumSq(int[] counts, int ncol) { double tmpSum = 0; if(_data.isEmpty()) return tmpSum; @@ -412,6 +772,54 @@ public double sumsq(int[] counts, int ncol) { return tmpSum; } + @Override + public double sumSq(int[] counts, double[] reference) { + final int nCol = reference.length; + final int numVals = _data.getNumRows(); + double ret = 0; + for(int i = 0; i < nCol; i++) + ret += reference[i] * reference[i]; + final double ref = ret; + ret *= counts[numVals]; + + if(!_data.isEmpty() && _data.isInSparseFormat()) { + final SparseBlock sb = _data.getSparseBlock(); + for(int i = 0; i < numVals; i++) { + final int countK = counts[i]; + if(sb.isEmpty(i)) + ret += ref * countK; + else { + final int apos = sb.pos(i); + final int alen = sb.size(i) + apos; + final int[] aix = sb.indexes(i); + final double[] avals = sb.values(i); + int k = apos; + int j = 0; + for(; j < _data.getNumColumns() && k < alen; j++) { + final double v = aix[k] == j ? avals[k++] + reference[j] : reference[j]; + ret += v * v * countK; + } + for(; j < _data.getNumColumns(); j++) + ret += reference[j] * reference[j] * countK; + } + + } + } + else if(!_data.isEmpty()) { + double[] values = _data.getDenseBlockValues(); + int off = 0; + for(int k = 0; k < numVals; k++) { + final int countK = counts[k]; + for(int j = 0; j < _data.getNumColumns(); j++) { + final double v = values[off++] + reference[j]; + ret += v * v * countK; + } + } + } + + return ret; + } + @Override public String getString(int colIndexes) { return _data.toString(); @@ -438,6 +846,53 @@ public boolean containsValue(double pattern) { return _data.containsValue(pattern); } + @Override + public boolean containsValue(double pattern, double[] reference) { + + if(_data.isEmpty()) { + for(double d : reference) + if(pattern == d) + return true; + return false; + } + else if(_data.isInSparseFormat()) { + final SparseBlock sb = _data.getSparseBlock(); + for(int i = 0; i < _data.getNumRows(); i++) { + if(sb.isEmpty(i)) + continue; + final int apos = sb.pos(i); + final int alen = sb.size(i) + apos; + final int[] aix = sb.indexes(i); + final double[] avals = sb.values(i); + int k = apos; + int j = 0; + for(; j < _data.getNumColumns() && k < alen; j++) { + if(aix[k] == j) { + if(reference[j] + avals[k++] == pattern) + return true; + } + else { + if(reference[j] == pattern) + return true; + } + } + for(; j < _data.getNumColumns(); j++) + if(reference[j] == pattern) + return true; + + } + } + else { + final double[] values = _data.getDenseBlockValues(); + final int nCol = reference.length; + for(int i = 0; i < values.length; i++) + if(values[i] + reference[i % nCol] == pattern) + return true; + + } + return false; + } + @Override public long getNumberNonZeros(int[] counts, int nCol) { if(_data.isEmpty()) @@ -449,7 +904,6 @@ public long getNumberNonZeros(int[] counts, int nCol) { for(int i = 0; i < _data.getNumRows(); i++) if(!sb.isEmpty(i)) nnz += sb.size(i) * counts[i]; - } else { double[] values = _data.getDenseBlockValues(); @@ -467,6 +921,64 @@ public long getNumberNonZeros(int[] counts, int nCol) { return nnz; } + @Override + public long getNumberNonZeros(int[] counts, double[] reference, int nRows) { + long nnz = 0; + for(double d : reference) + if(d != 0) + nnz++; + if(_data.isEmpty()) { + // sum counts + return nnz * nRows; + } + else if(_data.isInSparseFormat()) { + SparseBlock sb = _data.getSparseBlock(); + long emptyRowNNZ = nnz; + nnz *= counts[counts.length - 1]; // multiply count with the common value count in reference. + for(int i = 0; i < _data.getNumRows(); i++) { + if(sb.isEmpty(i)) + nnz += emptyRowNNZ * counts[i]; + else { + int countThis = 0; + final int apos = sb.pos(i); + final int alen = sb.size(i) + apos; + final int[] aix = sb.indexes(i); + final double[] avals = sb.values(i); + int k = apos; + int j = 0; + for(; j < _data.getNumColumns() && k < alen; j++) { + if(aix[k] == j) { + if(reference[j] + avals[k++] != 0) + countThis++; + } + else { + if(reference[j] != 0) + countThis++; + } + } + for(; j < _data.getNumColumns(); j++) + if(reference[j] != 0) + countThis++; + + nnz += countThis * counts[i]; + } + } + } + else { + nnz *= counts[counts.length - 1]; // multiply count with the common value count in reference. + final double[] values = _data.getDenseBlockValues(); + int off = 0; + for(int i = 0; i < _data.getNumRows(); i++) { + int countThisTuple = 0; + for(int j = 0; j < _data.getNumColumns(); j++) + if(values[off++] + reference[j] != 0) + countThisTuple++; + nnz += countThisTuple * counts[i]; + } + } + return nnz; + } + @Override public void addToEntry(Dictionary d, int fr, int to, int nCol) { double[] v = d.getValues(); @@ -529,6 +1041,8 @@ public ADictionary subtractTuple(double[] tuple) { MatrixBlock rowVector = new MatrixBlock(1, tuple.length, b); MatrixBlock res = new MatrixBlock(_data.getNumColumns(), _data.getNumRows(), _data.isInSparseFormat()); _data.binaryOperations(new BinaryOperator(Minus.getMinusFnObject()), rowVector, res); + if(res.isEmpty()) + return null; return new MatrixBlockDictionary(res); } @@ -645,7 +1159,7 @@ else if(_data.isInSparseFormat()) { DenseBlock dictV = new DenseBlockFP64(new int[] {numVals, aggregateColumns.length}, ret); MatrixBlock dictM = new MatrixBlock(numVals, aggregateColumns.length, dictV); - dictM.getNonZeros(); + dictM.recomputeNonZeros(); dictM.examSparsity(); return new MatrixBlockDictionary(dictM); @@ -653,16 +1167,66 @@ else if(_data.isInSparseFormat()) { @Override public ADictionary replace(double pattern, double replace, int nCol) { - MatrixBlock ret = _data.replaceOperations(new MatrixBlock(), pattern, replace); + final MatrixBlock ret = _data.replaceOperations(new MatrixBlock(), pattern, replace); + if(ret.isEmpty()) + return null; return new MatrixBlockDictionary(ret); } + @Override + public ADictionary replace(double pattern, double replace, double[] reference) { + final int nRow = _data.getNumRows(); + final int nCol = _data.getNumColumns(); + final MatrixBlock ret = new MatrixBlock(nRow, nCol, false); + ret.allocateDenseBlock(); + final double[] retV = ret.getDenseBlockValues(); + int off = 0; + if(_data.isInSparseFormat()) { + final SparseBlock sb = _data.getSparseBlock(); + for(int i = 0; i < nRow; i ++){ + if(sb.isEmpty(i)) + for(int j = 0; j < nCol; j++) + retV[off++] = pattern == reference[j] ? replace - reference[j] : 0; + else{ + final int apos = sb.pos(i); + final int alen = sb.size(i) + apos; + final int[] aix = sb.indexes(i); + final double[] avals = sb.values(i); + int j = 0; + for(int k = apos; j < nCol && k < alen; j++){ + final double v = aix[k] == j ? avals[k++] + reference[j] : reference[j]; + retV[off++] = pattern == v ? replace - reference[j] : v - reference[j]; + } + for(; j < nCol; j++) + retV[off++] = pattern == reference[j] ? replace - reference[j] : 0; + } + } + } + else { + final double[] values = _data.getDenseBlockValues(); + for(int i = 0; i < nRow; i++) { + for(int j = 0; j < nCol; j++) { + final double v = values[off]; + retV[off++] = pattern == v + reference[j] ? replace - reference[j] : v; + } + } + } + + ret.recomputeNonZeros(); + ret.examSparsity(); + if(ret.isEmpty()) + return null; + else + return new MatrixBlockDictionary(ret); + + } + @Override public ADictionary replaceZeroAndExtend(double replace, int nCol) { final int nRows = _data.getNumRows(); final int nCols = _data.getNumColumns(); final long nonZerosOut = (nRows + 1) * nCols; - final MatrixBlock ret = new MatrixBlock(_data.getNumRows() + 1, _data.getNumColumns(), false); + final MatrixBlock ret = new MatrixBlock(nRows + 1, nCols, false); ret.allocateBlock(); ret.setNonZeros(nonZerosOut); final double[] retValues = ret.getDenseBlockValues(); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java index bfab5275c79..879892a3745 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java @@ -101,7 +101,12 @@ public double aggregate(double init, Builtin fn) { } @Override - public double[] aggregateTuples(Builtin fn, final int nCol) { + public double aggregate(double init, Builtin fn, double[] reference) { + throw new NotImplementedException(); + } + + @Override + public double[] aggregateRows(Builtin fn, final int nCol) { if(nCol == 1) return getValues(); final int nRows = _values.length / nCol; @@ -115,6 +120,11 @@ public double[] aggregateTuples(Builtin fn, final int nCol) { return res; } + @Override + public double[] aggregateRows(Builtin fn, double[] reference) { + throw new NotImplementedException(); + } + @Override public QDictionary inplaceScalarOp(ScalarOperator op) { if(_values == null) @@ -154,6 +164,11 @@ else if(op.fn instanceof Plus) { return this; } + @Override + public QDictionary applyScalarOp(ScalarOperator op) { + throw new NotImplementedException(); + } + @Override public QDictionary applyScalarOp(ScalarOperator op, double newVal, int numCols) { double[] temp = getValues(); @@ -219,39 +234,60 @@ public int getNumberOfValues(int nCol) { } @Override - public double[] sumAllRowsToDouble(boolean square, int nrColumns) { - if(nrColumns == 1 && !square) + public double[] sumAllRowsToDouble(int nrColumns) { + if(nrColumns == 1) return getValues(); // shallow copy of values final int numVals = getNumberOfValues(nrColumns); double[] ret = new double[numVals]; - for(int k = 0; k < numVals; k++) { - ret[k] = sumRow(k, square, nrColumns); - } + for(int k = 0; k < numVals; k++) + ret[k] = sumRow(k, nrColumns); return ret; } @Override - public double sumRow(int k, boolean square, int nrColumns) { + public double[] sumAllRowsToDoubleSq(int nrColumns) { + final int numVals = getNumberOfValues(nrColumns); + double[] ret = new double[numVals]; + for(int k = 0; k < numVals; k++) + ret[k] = sumRowSq(k, nrColumns); + return ret; + } + + @Override + public double[] sumAllRowsToDoubleSq(double[] reference) { + throw new NotImplementedException(); + } + + @Override + public double sumRow(int k, int nrColumns) { if(_values == null) return 0; int valOff = k * nrColumns; - if(!square) { - int res = 0; - for(int i = 0; i < nrColumns; i++) { - res += _values[valOff + i]; - } - return res * _scale; - } - else { - // kSquare - double res = 0.0; - for(int i = 0; i < nrColumns; i++) - res += (int) (_values[valOff + i] * _values[valOff + i]) * _scale * _scale; - return res; + int res = 0; + for(int i = 0; i < nrColumns; i++) { + res += _values[valOff + i]; } + return res * _scale; + + } + + @Override + public double sumRowSq(int k, int nrColumns) { + if(_values == null) + return 0; + int valOff = k * nrColumns; + double res = 0.0; + for(int i = 0; i < nrColumns; i++) + res += (int) (_values[valOff + i] * _values[valOff + i]) * _scale * _scale; + return res; + } + + @Override + public double sumRowSq(int k, int nrColumns, double[] reference) { + throw new NotImplementedException(); } @Override @@ -260,17 +296,32 @@ public double[] colSum(int[] counts, int nCol) { } @Override - public void colSum(double[] c, int[] counts, int[] colIndexes, boolean square) { + public void colSum(double[] c, int[] counts, int[] colIndexes) { throw new NotImplementedException("Not Implemented"); } + @Override + public void colSumSq(double[] c, int[] counts, int[] colIndexes) { + throw new NotImplementedException("Not Implemented"); + } + + @Override + public void colSumSq(double[] c, int[] counts, int[] colIndexes, double[] reference) { + throw new NotImplementedException(); + } + @Override public double sum(int[] counts, int ncol) { throw new NotImplementedException("Not Implemented"); } @Override - public double sumsq(int[] counts, int ncol) { + public double sumSq(int[] counts, int ncol) { + throw new NotImplementedException("Not Implemented"); + } + + @Override + public double sumSq(int[] counts, double[] reference) { throw new NotImplementedException("Not Implemented"); } @@ -341,6 +392,11 @@ public boolean containsValue(double pattern) { throw new NotImplementedException("Not contains value on Q Dictionary"); } + @Override + public boolean containsValue(double pattern, double[] reference) { + throw new NotImplementedException(); + } + @Override public long getNumberNonZeros(int[] counts, int nCol) { long nnz = 0; @@ -357,6 +413,11 @@ public long getNumberNonZeros(int[] counts, int nCol) { return nnz; } + @Override + public long getNumberNonZeros(int[] counts, double[] reference, int nRows) { + throw new NotImplementedException("not implemented yet"); + } + @Override public void addToEntry(Dictionary d, int fr, int to, int nCol) { throw new NotImplementedException("Not implemented yet"); @@ -387,6 +448,11 @@ public void aggregateCols(double[] c, Builtin fn, int[] colIndexes) { throw new NotImplementedException(); } + @Override + public void aggregateCols(double[] c, Builtin fn, int[] colIndexes, double[] reference) { + throw new NotImplementedException(); + } + @Override public ADictionary scaleTuples(int[] scaling, int nCol) { throw new NotImplementedException(); @@ -403,6 +469,11 @@ public ADictionary replace(double pattern, double replace, int nCol) { throw new NotImplementedException(); } + @Override + public ADictionary replace(double pattern, double replace, double[] reference) { + throw new NotImplementedException(); + } + @Override public ADictionary replaceZeroAndExtend(double replace, int nCol) { throw new NotImplementedException(); @@ -420,25 +491,38 @@ public void colProduct(double[] res, int[] counts, int[] colIndexes) { @Override public ADictionary applyBinaryRowOpLeftAppendNewEntry(BinaryOperator op, double[] v, int[] colIndexes) { - // TODO Auto-generated method stub - return null; + throw new NotImplementedException(); } @Override public ADictionary binOpLeft(BinaryOperator op, double[] v, int[] colIndexes) { - // TODO Auto-generated method stub - return null; + throw new NotImplementedException(); } @Override public ADictionary binOpRight(BinaryOperator op, double[] v, int[] colIndexes) { - // TODO Auto-generated method stub - return null; + throw new NotImplementedException(); } @Override public ADictionary applyBinaryRowOpRightAppendNewEntry(BinaryOperator op, double[] v, int[] colIndexes) { - // TODO Auto-generated method stub - return null; + throw new NotImplementedException(); + } + + @Override + public ADictionary applyScalarOp(ScalarOperator op, double[] reference, double[] newReference) { + throw new NotImplementedException(); + } + + @Override + public ADictionary binOpLeft(BinaryOperator op, double[] v, int[] colIndexes, double[] reference, + double[] newReference) { + throw new NotImplementedException(); + } + + @Override + public ADictionary binOpRight(BinaryOperator op, double[] v, int[] colIndexes, double[] reference, + double[] newReference) { + throw new NotImplementedException(); } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/insertionsort/MaterializeSort.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/insertionsort/MaterializeSort.java index 2d9c5b84308..341268b763b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/insertionsort/MaterializeSort.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/insertionsort/MaterializeSort.java @@ -24,7 +24,7 @@ import org.apache.sysds.runtime.compress.utils.IntArrayList; public class MaterializeSort extends AInsertionSorter { - public static int CACHE_BLOCK = 1000; + public static int CACHE_BLOCK = 50000; /** a dense mapToData, that have a value for each row in the input. */ private final AMapToData md; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java index d3310fee72b..953ea49d858 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java @@ -25,12 +25,13 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.matrix.data.MatrixBlock; public abstract class AMapToData implements Serializable { - private static final long serialVersionUID = 100512759972844714L; - protected static final Log LOG = LogFactory.getLog(AMapToData.class.getName()); /** Number of unique values inside this map. */ @@ -44,7 +45,10 @@ public abstract class AMapToData implements Serializable { * @param nUnique number of unique values. */ protected AMapToData(int nUnique) { - this.nUnique = nUnique; + if(nUnique + 1 < 0) + this.nUnique = Integer.MAX_VALUE; + else + this.nUnique = nUnique + 1; } /** @@ -145,14 +149,63 @@ protected final void setUnique(int nUnique) { /** * Pre aggregate a dense matrix m into pre, subject to only including a row segment and column segment. * - * @param m The dense matrix values to preaggregate - * @param pre The preAggregate to populate with the summed values of m - * @param rl The row start in m - * @param ru The row end in m - * @param cl The column start in m - * @param cu The column end in m + * @param m The dense matrix values to preaggregate + * @param preAV The preAggregate double array populate with the summed values of m + * @param rl The row start in m + * @param ru The row end in m + * @param cl The column start in m + * @param cu The column end in m + */ + public final void preAggregateDense(MatrixBlock m, double[] preAV, int rl, int ru, int cl, int cu) { + final DenseBlock db = m.getDenseBlock(); + if(rl == ru - 1) { + final double[] mV = db.values(rl); + final int off = db.pos(rl); + preAggregateDenseToRow(mV, off, preAV, cl, cu); + } + else { + preAggregateDenseRows(m, preAV, rl, ru, cl, cu); + } + } + + protected abstract void preAggregateDenseToRow(double[] mV, int off, double[] preAV, int cl, int cu); + + protected void preAggregateDenseRows(MatrixBlock m, double[] preAV, int rl, int ru, int cl, int cu) { + LOG.warn("Inefficient implementation pre aggregate of multi row in use"); + final int nRow = m.getNumColumns(); + final int nVal = getUnique() -1; + final double[] mV = m.getDenseBlockValues(); + final int blockSize = 4000; + for(int block = cl; block < cu; block += blockSize) { + final int blockEnd = Math.min(block + blockSize, nRow); + for(int rowLeft = rl, offOut = 0; rowLeft < ru; rowLeft++, offOut += nVal) { + final int offLeft = rowLeft * nRow; + for(int rc = block; rc < blockEnd; rc++) { + final int idx = getIndex(rc); + preAV[offOut + idx] += mV[offLeft + rc]; + } + } + } + } + + public abstract void preAggregateDense(MatrixBlock m, double[] preAV, int rl, int ru, int cl, int cu, + AOffset indexes); + + public abstract void preAggregateSparse(SparseBlock sb, double[] preAV, int rl, int ru, AOffset indexes); + + /** + * Get the number of counts of each unique value contained in this map. + * + * @param counts The object to return. + * @param nRows The number of rows in the calling column group. */ - public abstract void preAggregateDense(MatrixBlock m, MatrixBlock pre, int rl, int ru, int cl, int cu); + public int[] getCounts(int[] counts, int nRows) { + final int nonDefaultLength = size(); + for(int i = 0; i < nonDefaultLength; i++) + counts[getIndex(i)]++; + counts[counts.length - 1] += nRows - nonDefaultLength; + return counts; + } /** * Copy the values in this map into another mapping object. diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java index 678ee65619e..af81dc338a9 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java @@ -24,7 +24,10 @@ import java.io.IOException; import java.util.BitSet; +import org.apache.commons.lang.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; +import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; +import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.utils.MemoryEstimates; @@ -118,24 +121,26 @@ public static MapToBit readFields(DataInput in) throws IOException { } @Override - public void preAggregateDense(MatrixBlock m, MatrixBlock pre, int rl, int ru, int cl, int cu) { - final int nRow = m.getNumColumns(); - final int nVal = pre.getNumColumns(); - final double[] preAV = pre.getDenseBlockValues(); - final double[] mV = m.getDenseBlockValues(); - final int blockSize = 4000; - for(int block = cl; block < cu; block += blockSize) { - final int blockEnd = Math.min(block + blockSize, nRow); - for(int rowLeft = rl, offOut = 0; rowLeft < ru; rowLeft++, offOut += nVal) { - final int offLeft = rowLeft * nRow; - for(int rc = block; rc < blockEnd; rc++) - preAV[_data.get(rc) ? offOut + 1 : offOut] += mV[offLeft + rc]; - } - } + protected void preAggregateDenseToRow(double[] mV, int off, double[] preAV, int cl, int cu) { + off += cl; + for(int rc = cl; rc < cu; rc++, off++) + preAV[_data.get(rc) ? 1 : 0] += mV[off]; + } + + @Override + public void preAggregateDense(MatrixBlock m, double[] preAV, int rl, int ru, int cl, int cu, + AOffset indexes) { + throw new NotImplementedException(); + } + + @Override + public void preAggregateSparse(SparseBlock sb, double[] preAV, int rl, int ru, AOffset indexes){ + throw new NotImplementedException(); } @Override public int getUpperBoundValue() { return 1; } + } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java index 5bd1e645b47..537c45836f6 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java @@ -25,6 +25,8 @@ import java.util.Arrays; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; +import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; +import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.utils.MemoryEstimates; @@ -98,9 +100,9 @@ public static MapToByte readFields(DataInput in) throws IOException { return new MapToByte(unique, data); } - public byte[] getBytes() { - return _data; - } + // public byte[] getBytes() { + // return _data; + // } @Override public void replace(int v, int r) { @@ -125,24 +127,27 @@ public void copy(AMapToData d) { } @Override - public void preAggregateDense(MatrixBlock m, MatrixBlock pre, int rl, int ru, int cl, int cu) { - final int nRow = m.getNumColumns(); - final int nVal = pre.getNumColumns(); - final double[] preAV = pre.getDenseBlockValues(); - final double[] mV = m.getDenseBlockValues(); - final int blockSize = 4000; - for(int block = cl; block < cu; block += blockSize) { - final int blockEnd = Math.min(block + blockSize, nRow); - for(int rowLeft = rl, offOut = 0; rowLeft < ru; rowLeft++, offOut += nVal) { - final int offLeft = rowLeft * nRow; - for(int rc = block; rc < blockEnd; rc++) { - final int idx = _data[rc] & 0xFF; - preAV[offOut + idx] += mV[offLeft + rc]; - } - } + protected void preAggregateDenseToRow(double[] mV, int off, double[] preAV, int cl, int cu) { + if(getUnique() < 127) { + for(int rc = cl; rc < cu; rc++) + preAV[_data[rc]] += mV[off + rc]; + } + else { + for(int rc = cl; rc < cu; rc++) + preAV[_data[rc] & 0xFF] += mV[off + rc]; } } + @Override + public final void preAggregateDense(MatrixBlock m, double[] preAV, int rl, int ru, int cl, int cu, AOffset indexes) { + indexes.preAggregateDenseMap(m, preAV, rl, ru, cl, cu, getUnique(), _data); + } + + @Override + public void preAggregateSparse(SparseBlock sb, double[] preAV, int rl, int ru, AOffset indexes){ + indexes.preAggregateSparseMap(sb, preAV, rl, ru, getUnique(), _data); + } + @Override public int getUpperBoundValue() { return 255; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java index d1fc0125a2a..249bc6ba50c 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java @@ -25,6 +25,8 @@ import java.util.Arrays; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; +import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; +import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.utils.MemoryEstimates; @@ -107,29 +109,50 @@ public static MapToChar readFields(DataInput in) throws IOException { return new MapToChar(unique, data); } - public char[] getChars() { + protected char[] getChars() { return _data; } + private void preAggregateDenseToRowBy8(double[] mV, double[] preAV, int cl, int cu, int off) { + int h = (cu - cl) % 8; + off += cl; + for(int rc = cl; rc < cl + h; rc++, off++) + preAV[_data[rc]] += mV[off]; + for(int rc = cl + h; rc < cu; rc += 8, off += 8) { + int id1 = _data[rc], id2 = _data[rc + 1], id3 = _data[rc + 2], id4 = _data[rc + 3], id5 = _data[rc + 4], + id6 = _data[rc + 5], id7 = _data[rc + 6], id8 = _data[rc + 7]; + preAV[id1] += mV[off]; + preAV[id2] += mV[off + 1]; + preAV[id3] += mV[off + 2]; + preAV[id4] += mV[off + 3]; + preAV[id5] += mV[off + 4]; + preAV[id6] += mV[off + 5]; + preAV[id7] += mV[off + 6]; + preAV[id8] += mV[off + 7]; + } + } + @Override - public void preAggregateDense(MatrixBlock m, MatrixBlock pre, int rl, int ru, int cl, int cu) { - final int nRow = m.getNumColumns(); - final int nVal = pre.getNumColumns(); - final double[] preAV = pre.getDenseBlockValues(); - final double[] mV = m.getDenseBlockValues(); - final int blockSize = 4000; - for(int block = cl; block < cu; block += blockSize) { - final int blockEnd = Math.min(block + blockSize, nRow); - for(int rowLeft = rl, offOut = 0; rowLeft < ru; rowLeft++, offOut += nVal) { - final int offLeft = rowLeft * nRow; - for(int rc = block; rc < blockEnd; rc++) { - final int idx = _data[rc]; - preAV[offOut + idx] += mV[offLeft + rc]; - } - } + protected void preAggregateDenseToRow(double[] mV, int off, double[] preAV, int cl, int cu) { + if(cu - cl > 1000) + preAggregateDenseToRowBy8(mV, preAV, cl, cu, off); + else { + off += cl; + for(int rc = cl; rc < cu; rc++, off++) + preAV[_data[rc]] += mV[off]; } } + @Override + public void preAggregateDense(MatrixBlock m, double[] preAV, int rl, int ru, int cl, int cu, AOffset indexes) { + indexes.preAggregateDenseMap(m, preAV, rl, ru, cl, cu, getUnique(), _data); + } + + @Override + public void preAggregateSparse(SparseBlock sb, double[] preAV, int rl, int ru, AOffset indexes) { + indexes.preAggregateSparseMap(sb, preAV, rl, ru, getUnique(), _data); + } + @Override public int getUpperBoundValue() { return Character.MAX_VALUE; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java index 8a706880e96..de8d95f6a3d 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java @@ -79,7 +79,7 @@ public static AMapToData resize(AMapToData d, int numTuples) { AMapToData ret; if(d instanceof MapToBit) return d; - else if(numTuples <= 1) + else if(numTuples <= 2) ret = new MapToBit(numTuples, size); else if(d instanceof MapToByte) return d; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java index b991ccb7e0f..6a518573a54 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java @@ -24,7 +24,10 @@ import java.io.IOException; import java.util.Arrays; +import org.apache.commons.lang.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; +import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; +import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.utils.MemoryEstimates; @@ -106,22 +109,20 @@ public static MapToInt readFields(DataInput in) throws IOException { } @Override - public void preAggregateDense(MatrixBlock m, MatrixBlock pre, int rl, int ru, int cl, int cu) { - final int nRow = m.getNumColumns(); - final int nVal = pre.getNumColumns(); - final double[] preAV = pre.getDenseBlockValues(); - final double[] mV = m.getDenseBlockValues(); - final int blockSize = 4000; - for(int block = cl; block < cu; block += blockSize) { - final int blockEnd = Math.min(block + blockSize, nRow); - for(int rowLeft = rl, offOut = 0; rowLeft < ru; rowLeft++, offOut += nVal) { - final int offLeft = rowLeft * nRow; - for(int rc = block; rc < blockEnd; rc++) { - final int idx = _data[rc]; - preAV[offOut + idx] += mV[offLeft + rc]; - } - } - } + protected void preAggregateDenseToRow(double[] mV, int off, double[] preAV, int cl, int cu) { + off += cl; + for(int rc = cl; rc < cu; rc++, off++) + preAV[_data[rc]] += mV[off]; + } + + @Override + public void preAggregateDense(MatrixBlock m, double[] preAV, int rl, int ru, int cl, int cu, AOffset indexes) { + throw new NotImplementedException(); + } + + @Override + public void preAggregateSparse(SparseBlock sb, double[] preAV, int rl, int ru, AOffset indexes) { + throw new NotImplementedException(); } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AIterator.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AIterator.java index 17a502629d5..1c7e81e2057 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AIterator.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AIterator.java @@ -49,13 +49,6 @@ protected AIterator(int index, int dataIndex, int offset) { */ public abstract void next(); - /** - * Get a boolean specifying if the iterator is done - * - * @return A boolean that is true if there are more values contained in the Iterator. - */ - public abstract boolean hasNext(); - /** * Get the current index value, note this correspond to a row index in the original matrix. * @@ -66,25 +59,38 @@ public int value() { } /** - * Get the current index value and increment the pointers + * find out if the current offset is not exceeding the index. * - * @return The current value pointed at. + * @param ub The offset to not exceed + * @return boolean if it is exceeded. */ - public int valueAndIncrement() { - int x = offset; - next(); - return x; + public boolean isNotOver(int ub) { + return offset < ub; } /** * Get the current data index associated with the index returned from value. * - * @return The data Index. + * This index points to a position int the mapToData object, that then inturn can be used to lookup the dictionary + * entry in ADictionary. + * + * @return The Data Index. */ public int getDataIndex() { return dataIndex; } + /** + * Get the current offsets index, that points to the underlying offsets list. + * + * This is available for debugging purposes, not to be used for the calling classes. + * + * @return The Offsets Index. + */ + public int getOffsetsIndex() { + return index; + } + /** * Get the current data index and increment the pointers using the next operator. * @@ -99,17 +105,23 @@ public int getDataIndexAndIncrement() { /** * Skip values until index is achieved. * - * @param index The index to skip to. + * @param idx The index to skip to. * @return the index that follows or are equal to the skip to index. */ - public int skipTo(int index) { - while(hasNext() && offset < index) - next(); - return offset; - } + public abstract int skipTo(int idx); /** * Copy the iterator with the current values. */ public abstract AIterator clone(); + + /** + * Unsafe version of equals, note that it should only compare iterators stemming from the same Offset Object. + * + * @param o The Iterator to compare + * @return The result + */ + public boolean equals(AIterator o) { + return o.index == this.index; + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffset.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffset.java index 27816009a25..2f51e7f7442 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffset.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffset.java @@ -21,12 +21,13 @@ import java.io.DataOutput; import java.io.IOException; import java.io.Serializable; -import java.lang.ref.SoftReference; -import java.util.HashMap; -import java.util.Map; +import org.apache.commons.lang.NotImplementedException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.data.SparseBlock; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; /** * Offset list encoder interface. @@ -39,9 +40,14 @@ */ public abstract class AOffset implements Serializable { - private static final long serialVersionUID = -4143271285905723425L; protected static final Log LOG = LogFactory.getLog(AOffset.class.getName()); - protected SoftReference> skipIterators; + + private ThreadLocal cacheRow = new ThreadLocal() { + @Override + protected OffsetCache initialValue() { + return null; + } + }; /** * Get an iterator of the offsets. @@ -57,16 +63,23 @@ public abstract class AOffset implements Serializable { * @return AIterator that iterate through index and dictionary offset values. */ public AIterator getIterator(int row) { - if(skipIterators != null) { - Map sk = skipIterators.get(); - AIterator it = sk.getOrDefault(row, null); - if(it != null) - return it.clone(); - } - AIterator it = getIterator(); + if(row <= getOffsetToFirst()) + return getIterator(); + else if(row >= getOffsetToLast()) + return null; + + // try the cache first. + OffsetCache c = cacheRow.get(); + if(c != null && c.row == row) + return c.it.clone(); + + // Use the cached iterator if it is closer to the queried row. + AIterator it = c != null && c.row < row ? c.it.clone() : getIterator(); it.skipTo(row); + // cache this new iterator. cacheIterator(it.clone(), row); return it; + } /** @@ -76,14 +89,18 @@ public AIterator getIterator(int row) { * @param row The row index to cache the iterator as. */ public void cacheIterator(AIterator it, int row) { - if(skipIterators != null) { - Map sk = skipIterators.get(); - sk.put(row, it); + if(it == null) + return; + OffsetCache c = cacheRow.get(); + if(c == null) { + c = new OffsetCache(); + c.it = it; + c.row = row; + cacheRow.set(c); } else { - Map nsk = new HashMap<>(); - nsk.put(row, it.clone()); - skipIterators = new SoftReference<>(nsk); + c.it = it; + c.row = row; } } @@ -98,6 +115,20 @@ public void cacheIterator(AIterator it, int row) { */ public abstract void write(DataOutput out) throws IOException; + /** + * Get the offset to the first index + * + * @return The first index offset + */ + public abstract int getOffsetToFirst(); + + /** + * Get the offset to the last value + * + * @return The last values offset + */ + public abstract int getOffsetToLast(); + /** * Get the in memory size of the Offset object * @@ -119,17 +150,200 @@ public void cacheIterator(AIterator it, int row) { */ public abstract int getSize(); + /** + * Get the length of the underlying offsets lists. + * + * @return The number of offsets. + */ + public abstract int getOffsetsLength(); + + public final void preAggregateDenseMap(MatrixBlock m, double[] preAV, int rl, int ru, int cl, int cu, int nVal, + char[] data) { + // multi row iterator. + final AIterator it = getIterator(cl); + if(it == null) + return; + else if(it.offset > cu) + cacheIterator(it, cu); // cache this iterator. + else if(rl == ru - 1) { + final DenseBlock db = m.getDenseBlock(); + final double[] mV = db.values(rl); + final int off = db.pos(rl); + preAggregateDenseMapRow(mV, off, preAV, cu, nVal, data, it); + } + else { + final DenseBlock db = m.getDenseBlock(); + preAggregateDenseMapRows(db, preAV, rl, ru, cl, cu, nVal, data); + } + } + + public final void preAggregateDenseMap(MatrixBlock m, double[] preAV, int rl, int ru, int cl, int cu, int nVal, + byte[] data) { + // multi row iterator. + final AIterator it = getIterator(cl); + if(it == null) + return; + else if(it.offset > cu) + cacheIterator(it, cu); // cache this iterator. + else if(rl == ru - 1) { + final DenseBlock db = m.getDenseBlock(); + final double[] mV = db.values(rl); + final int off = db.pos(rl); + preAggregateDenseMapRow(mV, off, preAV, cu, nVal, data, it); + } + else { + final DenseBlock db = m.getDenseBlock(); + preAggregateDenseMapRows(db, preAV, rl, ru, cl, cu, nVal, data); + } + } + + protected abstract void preAggregateDenseMapRow(double[] mV, int off, double[] preAV, int cu, int nVal, byte[] data, + AIterator it); + + protected abstract void preAggregateDenseMapRow(double[] mV, int off, double[] preAV, int cu, int nVal, char[] data, + AIterator it); + + protected void preAggregateDenseMapRows(DenseBlock db, double[] preAV, int rl, int ru, int cl, int cu, int nVal, + char[] data) { + + LOG.warn("Inefficient implementation of Preaggregate DenseMap multi row."); + throw new NotImplementedException("MultiRow Preaggregation not supported yet"); + } + + protected void preAggregateDenseMapRows(DenseBlock db, double[] preAV, int rl, int ru, int cl, int cu, int nVal, + byte[] data) { + LOG.warn("Inefficient implementation of Preaggregate DenseMap multi row."); + throw new NotImplementedException("MultiRow Preaggregation not supported yet"); + } + + public final void preAggregateSparseMap(SparseBlock sb, double[] preAV, int rl, int ru, int nVal, char[] data) { + final AIterator it = getIterator(); + if(rl == ru - 1) + preAggregateSparseMapRow(sb, preAV, rl, nVal, data, it); + else + throw new NotImplementedException("MultiRow Preaggregation not supported yet"); + } + + public final void preAggregateSparseMap(SparseBlock sb, double[] preAV, int rl, int ru, int nVal, byte[] data) { + final AIterator it = getIterator(); + if(rl == ru - 1) + preAggregateSparseMapRow(sb, preAV, rl, nVal, data, it); + else + throw new NotImplementedException("MultiRow Preaggregation not supported yet"); + } + + public final void preAggregateSparseMap(SparseBlock sb, double[] preAV, int rl, int ru, int cl, int cu, int nVal, + char[] data) { + // multi row iterator. + final AIterator it = getIterator(cl); + if(it == null) + return; + else if(it.offset > cu) + cacheIterator(it, cu); // cache this iterator. + else if(rl == ru - 1) + preAggregateSparseMapRow(sb, preAV, rl, nVal, data, it); + else + throw new NotImplementedException("MultiRow Preaggregation not supported yet"); + + } + + public final void preAggregateSparseMap(SparseBlock sb, double[] preAV, int rl, int ru, int cl, int cu, int nVal, + byte[] data) { + // multi row iterator. + final AIterator it = getIterator(cl); + if(it == null) + return; + else if(it.offset > cu) + cacheIterator(it, cu); // cache this iterator. + else if(rl == ru - 1) + preAggregateSparseMapRow(sb, preAV, rl, nVal, data, it); + else + throw new NotImplementedException("MultiRow Preaggregation not supported yet"); + + } + + protected void preAggregateSparseMapRow(SparseBlock sb, double[] preAV, int r, int nVal, byte[] data, AIterator it) { + final int apos = sb.pos(r); + final int alen = sb.size(r) + apos; + final int[] aix = sb.indexes(r); + final double[] avals = sb.values(r); + + final int maxId = data.length - 1; + + int j = apos; + while(true) { + final int idx = aix[j]; + if(idx == it.offset) { + preAV[data[it.dataIndex] & 0xFF] += avals[j++]; + if(j >= alen || it.dataIndex >= maxId) + break; + it.next(); + } + else if(idx < it.offset) { + j++; + if(j >= alen) + break; + } + else { + if(it.dataIndex >= maxId) + break; + it.next(); + } + } + } + + protected void preAggregateSparseMapRow(SparseBlock sb, double[] preAV, int r, int nVal, char[] data, AIterator it) { + final int apos = sb.pos(r); + final int alen = sb.size(r) + apos; + final int[] aix = sb.indexes(r); + final double[] avals = sb.values(r); + + final int maxId = data.length - 1; + + int j = apos; + while(true) { + final int idx = aix[j]; + if(idx == it.offset) { + preAV[data[it.dataIndex]] += avals[j++]; + if(j >= alen || it.dataIndex >= maxId) + break; + it.next(); + } + else if(idx < it.offset) { + j++; + if(j >= alen) + break; + } + else { + if(it.dataIndex >= maxId) + break; + it.next(); + } + } + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); - AIterator i = getIterator(); sb.append(this.getClass().getSimpleName()); - sb.append(" ["); - sb.append(i.valueAndIncrement()); - - while(i.hasNext()) - sb.append(", " + i.valueAndIncrement()); + final AIterator it = getIterator(); + final int last = getOffsetToLast(); + sb.append("["); + while(it.offset < last) { + sb.append(it.offset); + sb.append(", "); + it.next(); + } + sb.append(it.offset); sb.append("]"); return sb.toString(); } + + protected static class OffsetCache { + protected AIterator it = null; + protected int row = -1; + + protected OffsetCache() { + } + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetByte.java index 29133cbd758..ebb29df1900 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetByte.java @@ -21,18 +21,18 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.util.Arrays; -import org.apache.sysds.runtime.compress.DMLCompressionException; import org.apache.sysds.utils.MemoryEstimates; public class OffsetByte extends AOffset { private static final long serialVersionUID = -4716104973912491790L; + private static final int maxV = 255; - private final static int maxV = 255; private final byte[] offsets; private final int offsetToFirst; + private final int offsetToLast; + private final boolean noOverHalf; public OffsetByte(int[] indexes) { this(indexes, 0, indexes.length); @@ -41,21 +41,22 @@ public OffsetByte(int[] indexes) { public OffsetByte(int[] indexes, int apos, int alen) { int endSize = 0; offsetToFirst = indexes[apos]; + offsetToLast = indexes[alen - 1]; int ov = offsetToFirst; + // find the size of the array for(int i = apos + 1; i < alen; i++) { final int nv = indexes[i]; - endSize += 1 + (nv - ov) / maxV; + endSize += 1 + (nv - ov - 1) / maxV; ov = nv; } offsets = new byte[endSize]; ov = offsetToFirst; int p = 0; + // populate the array for(int i = apos + 1; i < alen; i++) { final int nv = indexes[i]; final int offsetSize = nv - ov; - if(offsetSize == 0) - throw new DMLCompressionException("Invalid difference between cells :\n" + Arrays.toString(indexes)); final int div = offsetSize / maxV; final int mod = offsetSize % maxV; if(mod == 0) { @@ -69,11 +70,30 @@ public OffsetByte(int[] indexes, int apos, int alen) { ov = nv; } + boolean noOverHalf = true; + for(byte b : offsets) + if(b < 0) { + noOverHalf = false; + break; + } + this.noOverHalf = noOverHalf; } - private OffsetByte(byte[] offsets, int offsetToFirst) { + protected OffsetByte(byte[] offsets, int offsetToFirst, int offsetToLast) { this.offsets = offsets; this.offsetToFirst = offsetToFirst; + this.offsetToLast = offsetToLast; + this.noOverHalf = getNoOverHalf(); + } + + private boolean getNoOverHalf() { + boolean noOverHalf = true; + for(byte b : offsets) + if(b < 0) { + noOverHalf = false; + break; + } + return noOverHalf; } @Override @@ -92,7 +112,9 @@ public void write(DataOutput out) throws IOException { @Override public long getInMemorySize() { - return getInMemorySize(offsets.length); + long size = 16 + 4 + 4 + 8; // object header plus ints plus reference + size += MemoryEstimates.byteArrayCost(offsets.length); + return size; } @Override @@ -103,29 +125,288 @@ public long getExactSizeOnDisk() { @Override public int getSize() { int size = 1; - for(byte b : offsets) { + for(byte b : offsets) if(b != 0) size++; - } + return size; } - public static long getInMemorySize(int length) { - long size = 16 + 4 + 8; // object header plus int plus reference - size += MemoryEstimates.byteArrayCost(length); + @Override + public int getOffsetToFirst() { + return offsetToFirst; + } + + @Override + public int getOffsetToLast() { + return offsetToLast; + } + + @Override + public int getOffsetsLength() { + return offsets.length; + } + + public static long estimateInMemorySize(int nOffs, int nRows) { + long size = 16 + 4 + 4 + 8; // object header plus int plus reference + size += MemoryEstimates.byteArrayCost(Math.max(nOffs, nRows / maxV)); return size; } public static OffsetByte readFields(DataInput in) throws IOException { - int offsetToFirst = in.readInt(); - int offsetsLength = in.readInt(); - byte[] offsets = new byte[offsetsLength]; + final int offsetToFirst = in.readInt(); + final int offsetsLength = in.readInt(); + + final byte[] offsets = new byte[offsetsLength]; + int offsetToLast = offsetToFirst; for(int i = 0; i < offsetsLength; i++) { offsets[i] = in.readByte(); + offsetToLast += offsets[i] & 0xFF; + } + return new OffsetByte(offsets, offsetToFirst, offsetToLast); + } + + @Override + protected final void preAggregateDenseMapRow(double[] mV, int off, double[] preAV, int cu, int nVal, byte[] data, + AIterator it) { + IterateByteOffset itb = (IterateByteOffset) it; + final boolean noZero = offsets.length == data.length - 1; + if(cu < offsetToLast + 1) { + if(noOverHalf && noZero && nVal < 127) + preAggregateDenseByteMapRowBelowEndAndNoZeroNoOverHalfAlsoData(mV, off, preAV, cu, data, itb); + else if(noOverHalf && noZero) + preAggregateDenseByteMapRowBelowEndAndNoZeroNoOverHalf(mV, off, preAV, cu, data, itb); + else if(noZero) + preAggregateDenseByteMapRowBelowEndAndNoZero(mV, off, preAV, cu, data, itb); + else + preAggregateDenseByteMapRowBelowEnd(mV, off, preAV, cu, data, itb); + cacheIterator(itb, cu); + } + else if(noZero) + preAggregateDenseByteMapRowNoZero(mV, off, preAV, data, itb); + else + preAggregateDenseByteMapRow(mV, off, preAV, data, itb); + + } + + private final void preAggregateDenseByteMapRow(double[] mV, int off, double[] preAV, byte[] data, + IterateByteOffset it) { + final int maxId = data.length - 1; + + int offset = it.offset + off; + int index = it.index; + int dataIndex = it.dataIndex; + + preAV[data[dataIndex] & 0xFF] += mV[offset]; + while(dataIndex < maxId) { + byte v = offsets[index]; + while(v == 0) { + offset += maxV; + index++; + v = offsets[index]; + } + offset += v & 0xFF; + index++; + dataIndex++; + preAV[data[dataIndex] & 0xFF] += mV[offset]; + } + } + + private final void preAggregateDenseByteMapRowNoZero(double[] mV, int off, double[] preAV, byte[] data, + IterateByteOffset it) { + + int offset = it.offset + off; + int index = it.index; + + while(index < offsets.length) { + preAV[data[index] & 0xFF] += mV[offset]; + offset += offsets[index++] & 0xFF; + } + // process straggler index. + preAV[data[index] & 0xFF] += mV[offset]; + } + + private void preAggregateDenseByteMapRowBelowEnd(double[] mV, int off, double[] preAV, int cu, byte[] data, + IterateByteOffset it) { + + cu += off; + it.offset += off; + while(it.offset < cu) { + preAV[data[it.dataIndex] & 0xFF] += mV[it.offset]; + byte v = offsets[it.index]; + while(v == 0) { + it.offset += maxV; + it.index++; + v = offsets[it.index]; + } + it.offset += v & 0xFF; + it.index++; + it.dataIndex++; + } + it.offset -= off; + } + + private void preAggregateDenseByteMapRowBelowEndAndNoZero(double[] mV, int off, double[] preAV, int cu, byte[] data, + IterateByteOffset it) { + + int offset = it.offset + off; + int index = it.index; + + cu += off; + + while(offset < cu) { + preAV[data[index] & 0xFF] += mV[offset]; + offset += offsets[index++] & 0xFF; + } + + it.offset = offset - off; + it.dataIndex = index; + it.index = index; + } + + private final void preAggregateDenseByteMapRowBelowEndAndNoZeroNoOverHalf(double[] mV, int off, double[] preAV, + int cu, byte[] data, IterateByteOffset it) { + int offset = it.offset + off; + int index = it.index; + + cu += off; + + while(offset < cu) { + preAV[data[index] & 0xFF] += mV[offset]; + offset += offsets[index++]; + } + + it.offset = offset - off; + it.dataIndex = index; + it.index = index; + } + + private final void preAggregateDenseByteMapRowBelowEndAndNoZeroNoOverHalfAlsoData(double[] mV, int off, + double[] preAV, int cu, byte[] data, IterateByteOffset it) { + int offset = it.offset + off; + int index = it.index; + + cu += off; + + while(offset < cu) { + preAV[data[index]] += mV[offset]; + offset += offsets[index++]; + } + + it.offset = offset - off; + it.dataIndex = index; + it.index = index; + } + + @Override + protected final void preAggregateDenseMapRow(double[] mV, int off, double[] preAV, int cu, int nVal, char[] data, + AIterator it) { + IterateByteOffset itb = (IterateByteOffset) it; + final boolean noZero = offsets.length == data.length - 1; + if(cu < offsetToLast + 1) { + if(noOverHalf && noZero) + preAggregateDenseCharMapRowBelowEndAndNoZeroNoOverHalf(mV, off, preAV, cu, data, itb); + else if(noZero) + preAggregateDenseCharMapRowBelowEndAndNoZero(mV, off, preAV, cu, data, itb); + else + preAggregateDenseCharMapRowBelowEnd(mV, off, preAV, cu, data, itb); + cacheIterator(itb, cu); + } + else if(noZero) + preAggregateDenseCharMapRowNoZero(mV, off, preAV, data, itb); + else + preAggregateDenseCharMapRow(mV, off, preAV, data, itb); + } + + private void preAggregateDenseCharMapRow(double[] mV, int off, double[] preAV, char[] data, IterateByteOffset it) { + final int maxId = data.length - 1; + int offset = it.offset + off; + int index = it.index; + int dataIndex = it.dataIndex; + + preAV[data[dataIndex]] += mV[offset]; + while(dataIndex < maxId) { + byte v = offsets[index]; + while(v == 0) { + offset += maxV; + index++; + v = offsets[index]; + } + offset += v & 0xff; + index++; + dataIndex++; + preAV[data[dataIndex]] += mV[offset]; } - return new OffsetByte(offsets, offsetToFirst); } + private void preAggregateDenseCharMapRowNoZero(double[] mV, int off, double[] preAV, char[] data, + IterateByteOffset it) { + + int offset = it.offset + off; + int index = it.index; + while(index < offsets.length) { + preAV[data[index]] += mV[offset]; + offset += offsets[index++] & 0xFF; + } + preAV[data[index]] += mV[offset]; + } + + private void preAggregateDenseCharMapRowBelowEnd(double[] mV, int off, double[] preAV, int cu, char[] data, + IterateByteOffset it) { + + cu += off; + it.offset += off; + while(it.offset < cu) { + preAV[data[it.dataIndex]] += mV[it.offset]; + byte v = offsets[it.index]; + while(v == 0) { + it.offset += maxV; + it.index++; + v = offsets[it.index]; + } + it.offset += v & 0xFF; + it.index++; + it.dataIndex++; + } + it.offset -= off; + } + + private void preAggregateDenseCharMapRowBelowEndAndNoZero(double[] mV, int off, double[] preAV, int cu, char[] data, + IterateByteOffset it) { + int offset = it.offset + off; + int index = it.index; + + cu += off; + + while(offset < cu) { + preAV[data[index]] += mV[offset]; + offset += offsets[index++] & 0xFF; + } + + it.offset = offset - off; + it.dataIndex = index; + it.index = index; + } + + private final void preAggregateDenseCharMapRowBelowEndAndNoZeroNoOverHalf(double[] mV, int off, double[] preAV, + int cu, char[] data, IterateByteOffset it) { + int offset = it.offset + off; + int index = it.index; + + cu += off; + + while(offset < cu) { + preAV[data[index]] += mV[offset]; + offset += offsets[index++]; + } + + it.offset = offset - off; + it.dataIndex = index; + it.index = index; + } + + + private class IterateByteOffset extends AIterator { private IterateByteOffset() { @@ -138,26 +419,22 @@ private IterateByteOffset(int index, int dataIndex, int offset) { @Override public void next() { - if(index >= offsets.length) { - index++; - dataIndex++; - return; - } - - final byte v = offsets[index++]; - if(v == 0) { + byte v = offsets[index]; + while(v == 0) { offset += maxV; - next(); - } - else { - dataIndex++; - offset += v & 0xFF; + index++; + v = offsets[index]; } + offset += v & 0xFF; + index++; + dataIndex++; } @Override - public boolean hasNext() { - return index <= offsets.length; + public int skipTo(int idx) { + while(offset < idx && index < offsets.length) + next(); + return offset; } @Override @@ -165,4 +442,5 @@ public IterateByteOffset clone() { return new IterateByteOffset(index, dataIndex, offset); } } + } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetChar.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetChar.java index c1c2930c850..dda7ab9e1da 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetChar.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetChar.java @@ -21,19 +21,17 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.util.Arrays; -import org.apache.sysds.runtime.compress.DMLCompressionException; import org.apache.sysds.utils.MemoryEstimates; public class OffsetChar extends AOffset { private static final long serialVersionUID = -1192266421395964882L; - - private final static int maxV = (int) Character.MAX_VALUE; + private static final int maxV = (int) Character.MAX_VALUE; private final char[] offsets; private final int offsetToFirst; + private final int offsetToLast; public OffsetChar(int[] indexes) { this(indexes, 0, indexes.length); @@ -42,21 +40,20 @@ public OffsetChar(int[] indexes) { public OffsetChar(int[] indexes, int apos, int alen) { int endSize = 0; offsetToFirst = indexes[apos]; + offsetToLast = indexes[alen - 1]; int ov = offsetToFirst; - for(int i = apos+1; i < alen; i++) { + for(int i = apos + 1; i < alen; i++) { final int nv = indexes[i]; - endSize += 1 + (nv - ov) / maxV; + endSize += 1 + (nv - ov - 1) / maxV; ov = nv; } offsets = new char[endSize]; ov = offsetToFirst; int p = 0; - for(int i = apos+1; i < alen; i++) { + for(int i = apos + 1; i < alen; i++) { final int nv = indexes[i]; final int offsetSize = (nv - ov); - if(offsetSize == 0) - throw new DMLCompressionException("Invalid difference between cells :\n" + Arrays.toString(indexes)); final int div = offsetSize / maxV; final int mod = offsetSize % maxV; if(mod == 0) { @@ -72,9 +69,10 @@ public OffsetChar(int[] indexes, int apos, int alen) { } } - private OffsetChar(char[] offsets, int offsetToFirst) { + private OffsetChar(char[] offsets, int offsetToFirst, int offsetToLast) { this.offsets = offsets; this.offsetToFirst = offsetToFirst; + this.offsetToLast = offsetToLast; } @Override @@ -93,7 +91,9 @@ public void write(DataOutput out) throws IOException { @Override public long getInMemorySize() { - return getInMemorySize(offsets.length); + long size = 16 + 4 + 8; // object header plus int plus reference + size += MemoryEstimates.charArrayCost(offsets.length); + return size; } @Override @@ -111,22 +111,69 @@ public int getSize() { return size; } + @Override + public int getOffsetToFirst() { + return offsetToFirst; + } + + @Override + public int getOffsetToLast() { + return offsetToLast; + } + + @Override + public int getOffsetsLength() { + return offsets.length; + } + public static OffsetChar readFields(DataInput in) throws IOException { - int offsetToFirst = in.readInt(); - int offsetsLength = in.readInt(); - char[] offsets = new char[offsetsLength]; + final int offsetToFirst = in.readInt(); + final int offsetsLength = in.readInt(); + final char[] offsets = new char[offsetsLength]; + int offsetToLast = offsetToFirst; for(int i = 0; i < offsetsLength; i++) { offsets[i] = in.readChar(); + offsetToLast += offsets[i]; } - return new OffsetChar(offsets, offsetToFirst); + return new OffsetChar(offsets, offsetToFirst, offsetToLast); } - public static long getInMemorySize(int length) { + public static long estimateInMemorySize(int nOffs, int nRows) { long size = 16 + 4 + 8; // object header plus int plus reference - size += MemoryEstimates.charArrayCost(length - 1); + size += MemoryEstimates.charArrayCost(Math.max(nOffs, nRows / maxV)); return size; } + @Override + protected final void preAggregateDenseMapRow(double[] mV, int off, double[] preAV, int cu, int nVal, byte[] data, + AIterator it) { + final int maxId = data.length - 1; + while(it.isNotOver(cu)) { + final int dx = it.getDataIndex(); + preAV[data[dx] & 0xFF] += mV[off + it.value()]; + if(dx < maxId) + it.next(); + else + break; + } + cacheIterator(it, cu); + } + + @Override + protected final void preAggregateDenseMapRow(double[] mV, int off, double[] preAV, int cu, int nVal, char[] data, + AIterator it) { + final int maxId = data.length - 1; + while(it.isNotOver(cu)) { + final int dx = it.getDataIndex(); + preAV[data[dx]] += mV[off + it.value()]; + if(dx < maxId) + it.next(); + else + break; + } + cacheIterator(it, cu); + } + private class IterateCharOffset extends AIterator { private IterateCharOffset() { @@ -139,25 +186,27 @@ private IterateCharOffset(int index, int dataIndex, int offset) { @Override public void next() { - if(index >= offsets.length) { - index++; - dataIndex++; - return; - } - final char v = offsets[index++]; - if(v == 0) { + char v = offsets[index]; + while(v == 0) { offset += maxV; - next(); - } - else { - dataIndex++; - offset += v; + index++; + v = offsets[index]; } + offset += v; + index++; + dataIndex++; } @Override - public boolean hasNext() { - return index <= offsets.length; + public int value() { + return offset; + } + + @Override + public int skipTo(int idx) { + while(offset < idx && index < offsets.length) + next(); + return offset; } @Override @@ -165,5 +214,4 @@ public IterateCharOffset clone() { return new IterateCharOffset(index, dataIndex, offset); } } - } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetFactory.java index d54be828985..60f8231f531 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetFactory.java @@ -22,12 +22,11 @@ import java.io.DataInput; import java.io.IOException; -import org.apache.sysds.runtime.compress.DMLCompressionException; - public interface OffsetFactory { // static final Log LOG = LogFactory.getLog(OffsetFactory.class.getName()); + /** The specific underlying tpes of offsets. */ public enum OFF_TYPE { BYTE, CHAR } @@ -35,11 +34,14 @@ public enum OFF_TYPE { /** * Main factory pattern creator for Offsets. * + * Note this creator is unsafe in the sense it is assumed that the input index list only contain a sequential non + * duplicate incrementing values. + * * @param indexes List of indexes, that is assumed to be sorted and have no duplicates * @return AOffset object containing offsets to the next value. */ - public static AOffset create(int[] indexes) { - return create(indexes, 0, indexes.length); + public static AOffset createOffset(int[] indexes) { + return createOffset(indexes, 0, indexes.length); } /** @@ -48,18 +50,22 @@ public static AOffset create(int[] indexes) { * This is useful if the input is created from a CSR matrix, since it allows us to not reallocate the indexes[] but * use the shared indexes from the entire CSR representation. * + * Note this creator is unsafe in the sense it is assumed that the input indexes in the range from apos to alen only + * contain a sequential non duplicate incrementing values. + * * @param indexes The indexes from which to take the offsets. * @param apos The position to start looking from in the indexes. * @param alen The position to end looking at in the indexes. * @return A new Offset. */ - public static AOffset create(int[] indexes, int apos, int alen) { + public static AOffset createOffset(int[] indexes, int apos, int alen) { + final int minValue = indexes[apos]; final int maxValue = indexes[alen - 1]; - if(maxValue < 0) - throw new DMLCompressionException("Invalid sizes given"); + final int range = maxValue - minValue; final int endLength = alen - apos; - final float avgDist = (float) maxValue / endLength; - if(avgDist < 256) + final long byteSize = OffsetByte.estimateInMemorySize(endLength, range); + final long charSize = OffsetChar.estimateInMemorySize(endLength, range); + if(byteSize < charSize) return new OffsetByte(indexes, apos, alen); else return new OffsetChar(indexes, apos, alen); @@ -96,16 +102,14 @@ public static AOffset readIn(DataInput in) throws IOException { * @return The estimated size of an offset given the number of offsets and rows. */ public static long estimateInMemorySize(int size, int nRows) { - if(size < 0 || nRows < 0) - throw new DMLCompressionException("Invalid sizes given: " + size + " " + nRows); - else if(size == 0) + if(size == 0) return 8; // If this is the case, then the compression results in constant col groups else { final int avgDiff = nRows / size; if(avgDiff < 256) - return OffsetByte.getInMemorySize(size - 1); + return OffsetByte.estimateInMemorySize(size - 1, nRows); else - return OffsetChar.getInMemorySize(size - 1); + return OffsetChar.estimateInMemorySize(size - 1, nRows); } } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibAppend.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibAppend.java index 6ca2619a160..68eca8045af 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibAppend.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibAppend.java @@ -70,6 +70,7 @@ public static MatrixBlock append(CompressedMatrixBlock left, CompressedMatrixBlo ret = appendColGroups(ret, left.getColGroups(), right.getColGroups(), left.getNumColumns()); + ret.setOverlapping(left.isOverlapping() || right.isOverlapping()); double compressedSize = ret.getInMemorySize(); double uncompressedSize = MatrixBlock.estimateSizeInMemory(m, n, ret.getSparsity()); @@ -85,24 +86,20 @@ public static MatrixBlock append(CompressedMatrixBlock left, CompressedMatrixBlo } private static MatrixBlock appendRightEmpty(CompressedMatrixBlock left, MatrixBlock right, int m, int n) { - CompressedMatrixBlock ret = new CompressedMatrixBlock(m, n); - List newGroup = new ArrayList<>(1); newGroup.add(ColGroupEmpty.generate(right.getNumColumns())); ret = appendColGroups(ret, left.getColGroups(), newGroup, left.getNumColumns()); - + ret.setOverlapping(left.isOverlapping()); return ret; } private static MatrixBlock appendLeftEmpty(MatrixBlock left, CompressedMatrixBlock right, int m, int n) { - CompressedMatrixBlock ret = new CompressedMatrixBlock(m, n); - List newGroup = new ArrayList<>(1); newGroup.add(ColGroupEmpty.generate(left.getNumColumns())); ret = appendColGroups(ret, newGroup, right.getColGroups(), left.getNumColumns()); - + ret.setOverlapping(right.isOverlapping()); return ret; } diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibBinaryCellOp.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibBinaryCellOp.java index e4c33330cd9..a045fa2362c 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibBinaryCellOp.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibBinaryCellOp.java @@ -158,8 +158,8 @@ private static CompressedMatrixBlock setupCompressedReturnMatrixBlock(Compressed return ret; } - private static MatrixBlock rowBinCellOp(CompressedMatrixBlock m1, MatrixBlock m2, MatrixBlock ret, - BinaryOperator op, boolean left) { + private static MatrixBlock rowBinCellOp(CompressedMatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, + boolean left) { CompressedMatrixBlock cRet = setupCompressedReturnMatrixBlock(m1, ret); if(isValidForOverlappingBinaryCellOperations(m1, op)) overlappingBinaryCellOp(m1, m2, cRet, op, left); @@ -333,32 +333,42 @@ protected static CompressedMatrixBlock binaryMVPlusStack(CompressedMatrixBlock m private static MatrixBlock binaryMVCol(CompressedMatrixBlock m1, MatrixBlock m2, BinaryOperator op, boolean left) { - MatrixBlock ret = new MatrixBlock(m1.getNumRows(), m1.getNumColumns(), false, -1).allocateBlock(); + final int nCols = m1.getNumColumns(); + final int nRows = m1.getNumRows(); + // Pre filter. + final List groups = m1.getColGroups(); + final boolean shouldFilter = CLALibUtils.shouldPreFilter(groups); + if(shouldFilter) { + CompressedMatrixBlock mf1 = new CompressedMatrixBlock(m1); + double[] constV = new double[nCols]; + final List filteredGroups = CLALibUtils.filterGroups(groups, constV); + filteredGroups.add(ColGroupFactory.genColGroupConst(constV)); + mf1.allocateColGroupList(filteredGroups); + m1 = mf1; + } + MatrixBlock ret = new MatrixBlock(nRows, nCols, false, -1).allocateBlock(); - final int blkz = CompressionSettings.BITMAP_BLOCK_SZ / m1.getNumColumns() * 5; + final int blkz = CompressionSettings.BITMAP_BLOCK_SZ / nCols * 5; final int k = op.getNumThreads(); long nnz = 0; if(k <= 1) { - for(int i = 0; i * blkz < m1.getNumRows(); i++) { + for(int i = 0; i < nRows; i += blkz) { if(left) - nnz += new BinaryMVColLeftTask(m1, m2, ret, i * blkz, Math.min(m1.getNumRows(), (i + 1) * blkz), op) - .call(); + nnz += new BinaryMVColLeftTask(m1, m2, ret, i, Math.min(nRows, i + blkz), op).call(); else - nnz += new BinaryMVColTask(m1, m2, ret, i * blkz, Math.min(m1.getNumRows(), (i + 1) * blkz), op).call(); + nnz += new BinaryMVColTask(m1, m2, ret, i, Math.min(nRows, i + blkz), op).call(); } } else { ExecutorService pool = CommonThreadPool.get(op.getNumThreads()); ArrayList> tasks = new ArrayList<>(); try { - for(int i = 0; i * blkz < m1.getNumRows(); i++) { + for(int i = 0; i < nRows; i += blkz) { if(left) - tasks.add( - new BinaryMVColLeftTask(m1, m2, ret, i * blkz, Math.min(m1.getNumRows(), (i + 1) * blkz), op)); + tasks.add(new BinaryMVColLeftTask(m1, m2, ret, i, Math.min(nRows, i + blkz), op)); else - tasks.add(new BinaryMVColTask(m1, m2, ret, i * blkz, Math.min(m1.getNumRows(), (i + 1) * blkz), op)); - + tasks.add(new BinaryMVColTask(m1, m2, ret, i, Math.min(nRows, i + blkz), op)); } for(Future f : pool.invokeAll(tasks)) nnz += f.get(); @@ -396,7 +406,7 @@ protected BinaryMVColTask(CompressedMatrixBlock m1, MatrixBlock m2, MatrixBlock public Integer call() { // unsafe decompress, since we count nonzeros afterwards. for(AColGroup g : _m1.getColGroups()) - g.decompressToBlock(_ret, _rl, _ru); + g.decompressToDenseBlock(_ret.getDenseBlock(), _rl, _ru); if(_m2.isInSparseFormat()) throw new NotImplementedException("Not Implemented sparse Format execution for MM."); @@ -440,7 +450,7 @@ protected BinaryMVColLeftTask(CompressedMatrixBlock m1, MatrixBlock m2, MatrixBl public Integer call() { // unsafe decompress, since we count nonzeros afterwards. for(AColGroup g : _m1.getColGroups()) - g.decompressToBlock(_ret, _rl, _ru); + g.decompressToDenseBlock(_ret.getDenseBlock(), _rl, _ru); if(_m2.isInSparseFormat()) throw new NotImplementedException("Not Implemented sparse Format execution for MM."); diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibCompAgg.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibCompAgg.java index 4a39eac1e89..49fdfe281c9 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibCompAgg.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibCompAgg.java @@ -563,7 +563,7 @@ private MatrixBlock getTmp() { private MatrixBlock decompressToTemp() { MatrixBlock tmp = getTmp(); for(AColGroup g : _m1.getColGroups()) - g.decompressToBlock(tmp, _rl, _ru, -_rl, 0); + g.decompressToDenseBlock(tmp.getDenseBlock(), _rl, _ru, -_rl, 0); tmp.setNonZeros(_rl + _ru); return tmp; } diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibDecompress.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibDecompress.java index 558ca7b3cd0..a646f8f4564 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibDecompress.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibDecompress.java @@ -20,7 +20,6 @@ package org.apache.sysds.runtime.compress.lib; import java.util.ArrayList; -import java.util.Comparator; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; @@ -31,13 +30,13 @@ import org.apache.commons.logging.LogFactory; import org.apache.sysds.api.DMLScript; import org.apache.sysds.runtime.compress.CompressedMatrixBlock; -import org.apache.sysds.runtime.compress.CompressionSettings; import org.apache.sysds.runtime.compress.DMLCompressionException; import org.apache.sysds.runtime.compress.colgroup.AColGroup; import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory; import org.apache.sysds.runtime.compress.colgroup.ColGroupUncompressed; import org.apache.sysds.runtime.controlprogram.parfor.stat.Timing; import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.util.CommonThreadPool; import org.apache.sysds.utils.DMLCompressionStatistics; @@ -69,7 +68,7 @@ public static void decompressTo(CompressedMatrixBlock cmb, MatrixBlock ret, int else if(outSparse) decompressToSparseBlock(cmb, ret, rowOffset, colOffset); else - decompressToDenseBlock(cmb, ret, rowOffset, colOffset); + decompressToDenseBlock(cmb, ret.getDenseBlock(), rowOffset, colOffset); if(DMLScript.STATISTICS) { final double t = time.stop(); @@ -81,29 +80,37 @@ else if(outSparse) private static void decompressToSparseBlock(CompressedMatrixBlock cmb, MatrixBlock ret, int rowOffset, int colOffset) { - final List groups = new ArrayList<>(cmb.getColGroups()); - final int nRows = cmb.getNumRows(); - for(AColGroup g : groups) - g.decompressToBlock(ret, 0, nRows, rowOffset, colOffset); + final SparseBlock sb = ret.getSparseBlock(); + final List groups = cmb.getColGroups(); + final int nRows = cmb.getNumRows(); + final boolean shouldFilter = CLALibUtils.shouldPreFilter(groups); + if(shouldFilter) { + final MatrixBlock tmp = cmb.getUncompressed("Decompression to put into Sparse Block"); + tmp.putInto(ret, rowOffset, colOffset, false); + } + else + for(AColGroup g : groups) + g.decompressToSparseBlock(sb, 0, nRows, rowOffset, colOffset); } - private static void decompressToDenseBlock(CompressedMatrixBlock cmb, MatrixBlock ret, int rowOffset, - int colOffset) { - final List groups = new ArrayList<>(cmb.getColGroups()); + private static void decompressToDenseBlock(CompressedMatrixBlock cmb, DenseBlock ret, int rowOffset, int colOffset) { + final List groups = cmb.getColGroups(); // final int nCols = cmb.getNumColumns(); final int nRows = cmb.getNumRows(); - final boolean containsSDC = CLALibUtils.containsSDCOrConst(groups); - double[] constV = containsSDC ? new double[cmb.getNumColumns()] : null; - final List filteredGroups = containsSDC ? CLALibUtils.filterGroups(groups, constV) : groups; - - for(AColGroup g : filteredGroups) - g.decompressToBlock(ret, 0, nRows, rowOffset, colOffset); - - if(constV != null) { + final boolean shouldFilter = CLALibUtils.shouldPreFilter(groups); + if(shouldFilter) { + final double[] constV = new double[cmb.getNumColumns()]; + final List filteredGroups = CLALibUtils.filterGroups(groups, constV); + for(AColGroup g : filteredGroups) + g.decompressToDenseBlock(ret, 0, nRows, rowOffset, colOffset); AColGroup cRet = ColGroupFactory.genColGroupConst(constV); - cRet.decompressToBlock(ret, 0, nRows, rowOffset, colOffset); + cRet.decompressToDenseBlock(ret, 0, nRows, rowOffset, colOffset); + } + else { + for(AColGroup g : groups) + g.decompressToDenseBlock(ret, 0, nRows, rowOffset, colOffset); } } @@ -122,34 +129,49 @@ private static MatrixBlock decompressExecute(CompressedMatrixBlock cmb, int k) { ret.setNonZeros(ret.recomputeNonZeros()); return ret; // if uncompressedColGroup is only colGroup. } - else if(ret == null) { - ret = new MatrixBlock(nRows, nCols, false, -1); - ret.allocateDenseBlock(); - } - final int block = (int) Math.ceil((double) (CompressionSettings.BITMAP_BLOCK_SZ) / nCols); - final int blklen = block > 1000 ? block + 1000 - block % 1000 : Math.max(64, block); + final boolean shouldFilter = CLALibUtils.shouldPreFilter(groups); + double[] constV = shouldFilter ? new double[nCols] : null; + final List filteredGroups = shouldFilter ? CLALibUtils.filterGroups(groups, constV) : groups; + + if(ret == null) { // There was no uncompressed group that fit the entire matrix. + final boolean sparse = !shouldFilter && !overlapping && + MatrixBlock.evalSparseFormatInMemory(nRows, nCols, nonZeros); + ret = new MatrixBlock(nRows, nCols, sparse); + if(sparse) + ret.allocateSparseRowsBlock(); + else + ret.allocateDenseBlock(); + } - final boolean containsSDC = CLALibUtils.containsSDCOrConst(groups); - double[] constV = containsSDC ? new double[ret.getNumColumns()] : null; - final List filteredGroups = containsSDC ? CLALibUtils.filterGroups(groups, constV) : groups; - if(LOG.isTraceEnabled()) - LOG.debug("Decompressing with block size: " + blklen); + // final int block = (int) Math.ceil((double) (CompressionSettings.BITMAP_BLOCK_SZ) / nCols); + // final int blklen = Math.max(block, 64); + final int blklen = 32; - sortGroups(filteredGroups, overlapping); + // final int blklen = block > 1000 ? block + 1000 - block % 1000 : Math.max(64, block); // check if we are using filtered groups, and if we are not force constV to null if(groups == filteredGroups) constV = null; final double eps = getEps(constV); - if(k == 1) - decompressSingleThread(ret, filteredGroups, nRows, blklen, constV, eps, nonZeros, overlapping); - else - decompressMultiThread(ret, filteredGroups, nRows, blklen, constV, eps, overlapping, k); - if(overlapping) - ret.recomputeNonZeros(); + if(k == 1) { + if(ret.isInSparseFormat()) { + decompressSparseSingleThread(ret, filteredGroups, nRows, blklen); + ret.setNonZeros(nonZeros); + } + else { + decompressDenseSingleThread(ret, filteredGroups, nRows, blklen, constV, eps, nonZeros, overlapping); + ret.setNonZeros(nonZeros == -1 || overlapping ? ret.recomputeNonZeros() : nonZeros); + } + } + else if(ret.isInSparseFormat()) { + decompressSparseMultiThread(ret, filteredGroups, nRows, blklen, k); + ret.setNonZeros(nonZeros); + } + else + decompressDenseMultiThread(ret, filteredGroups, nRows, blklen, constV, eps, overlapping, k); ret.examSparsity(); return ret; @@ -183,33 +205,46 @@ private static MatrixBlock getUncompressedColGroupAndRemoveFromListOfColGroups(L return ret; } - private static void decompressSingleThread(MatrixBlock ret, List filteredGroups, int rlen, int blklen, - double[] constV, double eps, long nonZeros, boolean overlapping) { + private static void decompressSparseSingleThread(MatrixBlock ret, List filteredGroups, int rlen, + int blklen) { + final SparseBlock sb = ret.getSparseBlock(); + for(int i = 0; i < rlen; i += blklen) { + final int rl = i; + final int ru = Math.min(i + blklen, rlen); + for(AColGroup grp : filteredGroups) + grp.decompressToSparseBlock(ret.getSparseBlock(), rl, ru); + for(int j = rl; j < ru; j++) + if(!sb.isEmpty(j)) + sb.sort(j); + } + + } + + private static void decompressDenseSingleThread(MatrixBlock ret, List filteredGroups, int rlen, + int blklen, double[] constV, double eps, long nonZeros, boolean overlapping) { for(int i = 0; i < rlen; i += blklen) { final int rl = i; final int ru = Math.min(i + blklen, rlen); for(AColGroup grp : filteredGroups) - grp.decompressToBlock(ret, rl, ru); + grp.decompressToDenseBlock(ret.getDenseBlock(), rl, ru); if(constV != null && !ret.isInSparseFormat()) addVector(ret, constV, eps, rl, ru); } - ret.setNonZeros(nonZeros == -1 || overlapping ? ret.recomputeNonZeros() : nonZeros); } - private static void decompressMultiThread(MatrixBlock ret, List filteredGroups, int rlen, int blklen, + private static void decompressDenseMultiThread(MatrixBlock ret, List filteredGroups, int rlen, int blklen, double[] constV, double eps, boolean overlapping, int k) { try { final ExecutorService pool = CommonThreadPool.get(k); - final ArrayList tasks = new ArrayList<>(); - for(int i = 0; i * blklen < rlen; i++) - tasks.add(new DecompressTask(filteredGroups, ret, eps, i * blklen, Math.min((i + 1) * blklen, rlen), - overlapping, constV)); - List> rtasks = pool.invokeAll(tasks); - pool.shutdown(); + final ArrayList tasks = new ArrayList<>(); + for(int i = 0; i < rlen; i += blklen) + tasks.add( + new DecompressDenseTask(filteredGroups, ret, eps, i, Math.min(i + blklen, rlen), overlapping, constV)); long nnz = 0; - for(Future rt : rtasks) + for(Future rt : pool.invokeAll(tasks)) nnz += rt.get(); + pool.shutdown(); ret.setNonZeros(nnz); } catch(InterruptedException | ExecutionException ex) { @@ -217,23 +252,21 @@ private static void decompressMultiThread(MatrixBlock ret, List filte } } - private static void sortGroups(List groups, boolean overlapping) { - if(overlapping) { - // add a bit of stability in decompression - Comparator comp = Comparator.comparing(x -> effect(x)); - groups.sort(comp); - } - } + private static void decompressSparseMultiThread(MatrixBlock ret, List filteredGroups, int rlen, + int blklen, int k) { + try { + final ExecutorService pool = CommonThreadPool.get(k); + final ArrayList tasks = new ArrayList<>(); + for(int i = 0; i < rlen; i += blklen) + tasks.add(new DecompressSparseTask(filteredGroups, ret, i, Math.min(i + blklen, rlen))); - /** - * Calculate an effect value for a column group. This is used to sort the groups before decompression to decompress - * the columns that have the smallest effect first. - * - * @param x A Group - * @return A Effect double value. - */ - private static double effect(AColGroup x) { - return (x instanceof ColGroupUncompressed) ? -Double.MAX_VALUE : -Math.max(x.getMax(), Math.abs(x.getMin())); + for(Future rt : pool.invokeAll(tasks)) + rt.get(); + pool.shutdown(); + } + catch(InterruptedException | ExecutionException ex) { + throw new DMLCompressionException("Parallel decompression failed", ex); + } } /** @@ -259,7 +292,7 @@ private static double getEps(double[] constV) { } } - private static class DecompressTask implements Callable { + private static class DecompressDenseTask implements Callable { private final List _colGroups; private final MatrixBlock _ret; private final double _eps; @@ -268,7 +301,7 @@ private static class DecompressTask implements Callable { private final double[] _constV; private final boolean _overlapping; - protected DecompressTask(List colGroups, MatrixBlock ret, double eps, int rl, int ru, + protected DecompressDenseTask(List colGroups, MatrixBlock ret, double eps, int rl, int ru, boolean overlapping, double[] constV) { _colGroups = colGroups; _ret = ret; @@ -282,7 +315,7 @@ protected DecompressTask(List colGroups, MatrixBlock ret, double eps, @Override public Long call() { for(AColGroup grp : _colGroups) - grp.decompressToBlock(_ret, _rl, _ru); + grp.decompressToDenseBlock(_ret.getDenseBlock(), _rl, _ru); if(_constV != null) addVector(_ret, _constV, _eps, _rl, _ru); @@ -291,6 +324,31 @@ public Long call() { } } + private static class DecompressSparseTask implements Callable { + private final List _colGroups; + private final MatrixBlock _ret; + private final int _rl; + private final int _ru; + + protected DecompressSparseTask(List colGroups, MatrixBlock ret, int rl, int ru) { + _colGroups = colGroups; + _ret = ret; + _rl = rl; + _ru = ru; + } + + @Override + public Object call() { + final SparseBlock sb = _ret.getSparseBlock(); + for(AColGroup grp : _colGroups) + grp.decompressToSparseBlock(_ret.getSparseBlock(), _rl, _ru); + for(int i = _rl; i < _ru; i++) + if(!sb.isEmpty(i)) + sb.sort(i); + return null; + } + } + /** * Add the rowV vector to each row in ret. * diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java index 919f98a8db6..8b197b3ac3d 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java @@ -121,8 +121,8 @@ public static void leftMultByTransposeSelf(CompressedMatrixBlock cmb, MatrixBloc final List groups = cmb.getColGroups(); final int numColumns = cmb.getNumColumns(); final int numRows = cmb.getNumRows(); - final boolean containsSDC = CLALibUtils.containsSDCOrConst(groups); - final double[] constV = containsSDC ? new double[numColumns] : null; + final boolean shouldFilter = CLALibUtils.shouldPreFilter(groups); + final double[] constV = shouldFilter ? new double[numColumns] : null; final List filteredGroups = CLALibUtils.filterGroups(groups, constV); // TODO add parallel again @@ -177,11 +177,11 @@ private static MatrixBlock leftMultByCompressedTransposedMatrix(CompressedMatrix final List rightCG = right.getColGroups(); final List leftCG = left.getColGroups(); - final boolean containsRight = CLALibUtils.containsSDCOrConst(rightCG); + final boolean containsRight = CLALibUtils.shouldPreFilter(rightCG); double[] cR = containsRight ? new double[cr] : null; final List fRight = CLALibUtils.filterGroups(rightCG, cR); - final boolean containsLeft = CLALibUtils.containsSDCOrConst(leftCG); + final boolean containsLeft = CLALibUtils.shouldPreFilter(leftCG); double[] cL = containsLeft ? new double[rl] : null; final List fLeft = CLALibUtils.filterGroups(leftCG, cL); @@ -246,11 +246,11 @@ private static MatrixBlock leftMultByMatrix(List colGroups, MatrixBlo } final int numColumnsOut = ret.getNumColumns(); - final boolean containsSDC = CLALibUtils.containsSDCOrConst(colGroups); + final boolean shouldFilter = CLALibUtils.shouldPreFilter(colGroups); final int lr = that.getNumRows(); // a constant colgroup summing the default values. - double[] constV = containsSDC ? new double[numColumnsOut] : null; + double[] constV = shouldFilter ? new double[numColumnsOut] : null; final List filteredGroups = CLALibUtils.filterGroups(colGroups, constV); if(colGroups == filteredGroups) constV = null; @@ -258,9 +258,9 @@ private static MatrixBlock leftMultByMatrix(List colGroups, MatrixBlo if(!filteredGroups.isEmpty()) { if(k == 1) - rowSums = leftMultByMatrixPrimitive(filteredGroups, that, ret, 0, lr, containsSDC ? new double[lr] : null); + rowSums = leftMultByMatrixPrimitive(filteredGroups, that, ret, 0, lr, shouldFilter ? new double[lr] : null); else - rowSums = leftMultByMatrixParallel(filteredGroups, that, ret, containsSDC, overlapping, k); + rowSums = leftMultByMatrixParallel(filteredGroups, that, ret, shouldFilter, overlapping, k); } else if(constV != null) rowSums = that.rowSum(k).getDenseBlockValues(); @@ -412,18 +412,19 @@ private static void leftMultByMatrixPrimitiveSparse(List colGroups, M int rl, int ru, double[] rowSum) { for(int i = rl; i < ru; i++) { + final SparseBlock sb = that.getSparseBlock(); + if(sb.isEmpty(i)) + continue; for(int j = 0; j < colGroups.size(); j++) { colGroups.get(j).leftMultByMatrix(that, ret, i, i + 1); } if(rowSum != null) { - final SparseBlock sb = that.getSparseBlock(); - if(!sb.isEmpty(i)) { - final int apos = sb.pos(i); - final int alen = sb.size(i) + apos; - final double[] aval = sb.values(i); - for(int j = apos; j < alen; j++) - rowSum[i] += aval[j]; - } + final int apos = sb.pos(i); + final int alen = sb.size(i) + apos; + final double[] aval = sb.values(i); + for(int j = apos; j < alen; j++) + rowSum[i] += aval[j]; + } } } @@ -440,8 +441,8 @@ private static void leftMultByMatrixPrimitiveDense(List colGroups, Ma // The number of column groups to process together // the value should ideally be set so that the colGroups fits into cache together with a row block. // currently we only try to avoid having a dangling small number of column groups in the last block. - final int colGroupBlocking = preAggCGs.size() % 16 < 4 ? 20 : 16; - + final int colGroupBlocking = preAggCGs.size();// % 16 < 4 ? 20 : 16; + // final int colGroupBlocking = 3; // Allocate pre Aggregate Array List final MatrixBlock[] preAgg = populatePreAggregate(colGroupBlocking); @@ -461,27 +462,13 @@ private static void leftMultByMatrixPrimitiveDense(List colGroups, Ma preAgg[j % colGroupBlocking].reset(rowBlockSize, nVals, false); } - int colBlockSize = 32000; - // For each row block for(int h = rl; h < ru; h += rowBlockSize) { - // For each column block final int rowUpper = Math.min(h + rowBlockSize, ru); - for(int i = 0; i < lc; i += colBlockSize) { - final int colUpper = Math.min(i + colBlockSize, lc); - // Pre Aggregate each column group in block - for(int j = g; j < gEnd && j < preAggCGs.size(); j++) { - preAggCGs.get(j).preAggregateDense(that, preAgg[j % colGroupBlocking], h, rowUpper, i, colUpper); - } - if(rowSum != null) { - final double[] thatV = that.getDenseBlockValues(); - for(int r = h; r < rowUpper; r++) { - final int rowOff = r * lc; - for(int c = rowOff + i; c < rowOff + colUpper; c++) - rowSum[r] += thatV[c]; - } - } - } + if(rowSum != null) + preAggregateWithRowSums(that, h, rowUpper, preAggCGs, g, gEnd, preAgg, rowSum); + else + preAggregate(that, h, rowUpper, preAggCGs, g, gEnd, preAgg); // Multiply out the preAggregate to the output matrix. for(int j = g; j < gEnd && j < preAggCGs.size(); j++) { @@ -507,6 +494,42 @@ private static void leftMultByMatrixPrimitiveDense(List colGroups, Ma } } + private static void preAggregateWithRowSums(MatrixBlock that, int rl, int ru, List preAggCGs, int g, + int gEnd, MatrixBlock[] preAgg, double[] rowSum) { + final int lc = that.getNumColumns(); + final int colBlockSize = 25000; + final int colGroupBlocking = preAgg.length; + // For each column block + for(int i = 0; i < lc; i += colBlockSize) { + final int colUpper = Math.min(i + colBlockSize, lc); + // Pre Aggregate each column group in block + for(int j = g; j < gEnd && j < colGroupBlocking; j++) + preAggCGs.get(j).preAggregateDense(that, preAgg[j % colGroupBlocking], rl, ru, i, colUpper); + + final double[] thatV = that.getDenseBlockValues(); + for(int r = rl; r < ru; r++) { + final int rowOff = r * lc; + for(int c = rowOff + i; c < rowOff + colUpper; c++) + rowSum[r] += thatV[c]; + } + + } + } + + private static void preAggregate(MatrixBlock that, int rl, int ru, List preAggCGs, int g, int gEnd, + MatrixBlock[] preAgg) { + + final int lc = that.getNumColumns(); + final int colBlockSize = 25000; + final int colGroupBlocking = preAgg.length; + for(int i = 0; i < lc; i += colBlockSize) { + final int colUpper = Math.min(i + colBlockSize, lc); + // Pre Aggregate each column group in block + for(int j = g; j < gEnd && j < colGroupBlocking; j++) + preAggCGs.get(j).preAggregateDense(that, preAgg[j % colGroupBlocking], rl, ru, i, colUpper); + } + } + private static MatrixBlock[] populatePreAggregate(int colGroupBlocking) { final MatrixBlock[] preAgg = new MatrixBlock[colGroupBlocking]; // populate the preAgg array. diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibRightMultBy.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibRightMultBy.java index 52ad0da3e4d..3ebdd3a00e3 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibRightMultBy.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibRightMultBy.java @@ -92,9 +92,9 @@ private static MatrixBlock rightMultByMatrixOverlapping(CompressedMatrixBlock m1 final List retCg = new ArrayList<>(); final CompressedMatrixBlock ret = new CompressedMatrixBlock(rl, cr); - final boolean containsSDC = CLALibUtils.containsSDCOrConst(colGroups); + final boolean shouldFilter = CLALibUtils.shouldPreFilter(colGroups); - double[] constV = containsSDC ? new double[rr] : null; + double[] constV = shouldFilter ? new double[rr] : null; final List filteredGroups = CLALibUtils.filterGroups(colGroups, constV); if(colGroups == filteredGroups) constV = null; diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibSlice.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibSlice.java new file mode 100644 index 00000000000..94865036b42 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibSlice.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.compress.lib; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.compress.CompressedMatrixBlock; +import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; + +public class CLALibSlice { + + protected static final Log LOG = LogFactory.getLog(CLALibSlice.class.getName()); + + public static MatrixBlock slice(CompressedMatrixBlock cmb, int rl, int ru, int cl, int cu, boolean deep) { + if(rl == ru && cl == cu) + return sliceSingle(cmb, rl, cl); + else if(rl == 0 && ru == cmb.getNumRows() - 1) + return sliceColumns(cmb, cl, cu); + else if(cl == 0 && cu == cmb.getNumColumns() - 1) + return sliceRows(cmb, rl, ru); + else + return sliceInternal(cmb, rl, ru, cl, cu); + } + + private static MatrixBlock sliceInternal(CompressedMatrixBlock cmb, int rl, int ru, int cl, int cu) { + // In the case where an internal matrix is sliced out, then first slice out the + // columns to an compressed intermediate. + // Then call slice recursively, to do the row slice. + // Since we do not copy the index structure but simply maintain a pointer to the + // original this is fine. + return sliceRows(sliceColumns(cmb, cl, cu), rl, ru); + } + + private static MatrixBlock sliceRows(CompressedMatrixBlock cmb, int rl, int ru) { + final int nCol = cmb.getNumColumns(); + final int rue = ru + 1; + MatrixBlock tmp = new MatrixBlock(rue - rl, nCol, false).allocateDenseBlock(); + DenseBlock db = tmp.getDenseBlock(); + final List groups = cmb.getColGroups(); + final boolean shouldFilter = CLALibUtils.shouldPreFilter(groups); + if(shouldFilter) { + final double[] constV = new double[nCol]; + final List filteredGroups = CLALibUtils.filterGroups(groups, constV); + for(AColGroup g : filteredGroups) + g.decompressToDenseBlock(db, rl, rue, -rl, 0); + AColGroup cRet = ColGroupFactory.genColGroupConst(constV); + cRet.decompressToDenseBlock(db, rl, rue, -rl, 0); + } + else + for(AColGroup g : groups) + g.decompressToDenseBlock(db, rl, rue, -rl, 0); + + tmp.recomputeNonZeros(); + tmp.examSparsity(); + return tmp; + } + + private static MatrixBlock sliceSingle(CompressedMatrixBlock cmb, int row, int col) { + // get a single index, and return in a matrixBlock + MatrixBlock tmp = new MatrixBlock(1, 1, 0); + tmp.appendValue(0, 0, cmb.getValue(row, col)); + return tmp; + } + + private static CompressedMatrixBlock sliceColumns(CompressedMatrixBlock cmb, int cl, int cu) { + final int cue = cu + 1; + final CompressedMatrixBlock ret = new CompressedMatrixBlock(cmb.getNumRows(), cue - cl); + + final List newColGroups = new ArrayList<>(); + for(AColGroup grp : cmb.getColGroups()) { + final AColGroup slice = grp.sliceColumns(cl, cue); + if(slice != null) + newColGroups.add(slice); + } + + ret.allocateColGroupList(newColGroups); + ret.recomputeNonZeros(); + ret.setOverlapping(cmb.isOverlapping()); + return ret; + } + +} diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUtils.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUtils.java index d6965173600..0141a8d802b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUtils.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUtils.java @@ -23,29 +23,18 @@ import java.util.Arrays; import java.util.List; +import org.apache.commons.lang.NotImplementedException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.AMorphingMMColGroup; import org.apache.sysds.runtime.compress.colgroup.ColGroupConst; import org.apache.sysds.runtime.compress.colgroup.ColGroupEmpty; import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory; -import org.apache.sysds.runtime.compress.colgroup.ColGroupSDC; -import org.apache.sysds.runtime.compress.colgroup.ColGroupSDCSingle; public final class CLALibUtils { - // private static final Log LOG = LogFactory.getLog(CLALibUtils.class.getName()); - - /** - * Helper method to determine if the column groups contains SDC - * - * @param groups The ColumnGroups to analyze - * @return A Boolean saying it there is >= 2 SDC Groups. - */ - protected static boolean containsSDC(List groups) { - for(AColGroup g : groups) - if(g instanceof ColGroupSDC || g instanceof ColGroupSDCSingle) - return true; - return false; - } + protected static final Log LOG = LogFactory.getLog(CLALibUtils.class.getName()); /** * Helper method to determine if the column groups contains SDC or Constant groups. @@ -53,37 +42,13 @@ protected static boolean containsSDC(List groups) { * @param groups The ColumnGroups to analyze * @return A Boolean saying there is SDC groups or Constant groups. */ - protected static boolean containsSDCOrConst(List groups) { + protected static boolean shouldPreFilter(List groups) { for(AColGroup g : groups) - if(g instanceof ColGroupSDC || g instanceof ColGroupSDCSingle || g instanceof ColGroupConst) + if(g instanceof AMorphingMMColGroup || g instanceof ColGroupConst) return true; return false; } - /** - * Helper method to filter out SDC Groups, to add their common value to the ConstV. This allows exploitation of the - * common values in the SDC Groups. - * - * @param groups The Column Groups - * @param constV The Constant vector to add common values to. - * @return The Filtered list of Column groups containing no SDC Groups but only SDCZero groups. - */ - protected static List filterSDCGroups(List groups, double[] constV) { - if(constV == null) - return groups; - - final List filteredGroups = new ArrayList<>(); - for(AColGroup g : groups) { - if(g instanceof ColGroupSDC) - filteredGroups.add(((ColGroupSDC) g).extractCommon(constV)); - else if(g instanceof ColGroupSDCSingle) - filteredGroups.add(((ColGroupSDCSingle) g).extractCommon(constV)); - else - filteredGroups.add(g); - } - return returnGroupIfFiniteNumbers(groups, filteredGroups, constV); - } - /** * Helper method to filter out SDC Groups and remove all constant groups, to reduce computation. * @@ -97,10 +62,8 @@ protected static List filterGroups(List groups, double[] c final List filteredGroups = new ArrayList<>(); for(AColGroup g : groups) { - if(g instanceof ColGroupSDC) - filteredGroups.add(((ColGroupSDC) g).extractCommon(constV)); - else if(g instanceof ColGroupSDCSingle) - filteredGroups.add(((ColGroupSDCSingle) g).extractCommon(constV)); + if(g instanceof AMorphingMMColGroup) + filteredGroups.add(((AMorphingMMColGroup) g).extractCommon(constV)); else if(g instanceof ColGroupEmpty) continue; else if(g instanceof ColGroupConst) @@ -115,7 +78,8 @@ private static List returnGroupIfFiniteNumbers(List groups double[] constV) { for(double v : constV) if(!Double.isFinite(v)) - return groups; + throw new NotImplementedException(); + // return groups; return filteredGroups; } diff --git a/src/test/java/org/apache/sysds/test/TestUtils.java b/src/test/java/org/apache/sysds/test/TestUtils.java index 125de369696..a0ba5bf418a 100644 --- a/src/test/java/org/apache/sysds/test/TestUtils.java +++ b/src/test/java/org/apache/sysds/test/TestUtils.java @@ -918,7 +918,7 @@ private static void compareMatricesBitAvgDistanceSparse(SparseBlock sbe, SparseB continue; if(sba.size(i) != sbe.size(i)) - fail(message+"\nNumber of values are not equal in row: " + i); + fail(message+"\nNumber of values are not equal in row: " + i +"\nactual:"+ sba.get(i) +"\nexpected:"+ sbe.get(i)); final double[] e = sbe.values(i); final double[] a = sba.values(i); diff --git a/src/test/java/org/apache/sysds/test/component/compress/CompressedMatrixTest.java b/src/test/java/org/apache/sysds/test/component/compress/CompressedMatrixTest.java index fee58b97b89..b914dd7a301 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/CompressedMatrixTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/CompressedMatrixTest.java @@ -171,6 +171,17 @@ public void testUnaryOperators(AggType aggType, boolean inCP) { testUnaryOperators(aggType, auop, inCP); } + @Test + public void testNonZeros() { + if(!(cmb instanceof CompressedMatrixBlock)) + return; // Input was not compressed then just pass test + if(!(cmb.getNonZeros() >= mb.getNonZeros())) { + fail(bufferedToString + "\nIncorrect number of non Zeros should guarantee greater than or equals but are " + + cmb.getNonZeros() + " and should be: " + mb.getNonZeros()); + } + + } + @Test public void testSerialization() { try { diff --git a/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java b/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java index 16ca8ad8246..34a800f4262 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java +++ b/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java @@ -95,8 +95,8 @@ public abstract class CompressedTestBase extends TestBase { protected static ValueRange[] usedValueRanges = new ValueRange[] {ValueRange.BOOLEAN, ValueRange.SMALL, ValueRange.NEGATIVE}; - protected static OverLapping[] overLapping = new OverLapping[] {OverLapping.PLUS_LARGE, OverLapping.MATRIX, - OverLapping.NONE, OverLapping.APPEND_CONST, OverLapping.C_BIND_SELF}; + protected static OverLapping[] overLapping = new OverLapping[] {OverLapping.PLUS_LARGE, OverLapping.PLUS_ROW_VECTOR, + OverLapping.MATRIX, OverLapping.NONE, OverLapping.APPEND_CONST, OverLapping.C_BIND_SELF}; protected static CompressionSettingsBuilder[] usedCompressionSettings = new CompressionSettingsBuilder[] { // CLA TESTS! @@ -264,11 +264,24 @@ else if(ov == OverLapping.SQUASH) { cmb = ((CompressedMatrixBlock) cmb).squash(_k); } } - if(ov == OverLapping.PLUS || ov == OverLapping.PLUS_LARGE) { - ScalarOperator sop = ov == OverLapping.PLUS_LARGE ? new LeftScalarOperator(Plus.getPlusFnObject(), - -3142151) : new LeftScalarOperator(Plus.getPlusFnObject(), 5); - mb = mb.scalarOperations(sop, new MatrixBlock()); - cmb = cmb.scalarOperations(sop, new MatrixBlock()); + if(cmb instanceof CompressedMatrixBlock) { + + if(ov == OverLapping.PLUS || ov == OverLapping.PLUS_LARGE) { + ScalarOperator sop = ov == OverLapping.PLUS_LARGE ? new LeftScalarOperator(Plus.getPlusFnObject(), + -3142151) : new LeftScalarOperator(Plus.getPlusFnObject(), 5); + mb = mb.scalarOperations(sop, new MatrixBlock()); + cmb = cmb.scalarOperations(sop, new MatrixBlock()); + } + else if(ov == OverLapping.PLUS_ROW_VECTOR) { + + MatrixBlock v = TestUtils.generateTestMatrixBlock(1, cols, -1, 1, 1.0, 4); + BinaryOperator bop = new BinaryOperator(Plus.getPlusFnObject(), _k); + mb = mb.binaryOperations(bop, v, null); + cmb = cmb.binaryOperations(bop, v, null); + lossyTolerance = lossyTolerance + 2; + } + if(!(cmb instanceof CompressedMatrixBlock)) + fail("Invalid construction, should result in compressed MatrixBlock"); } } @@ -285,6 +298,7 @@ else if(ov == OverLapping.SQUASH) { matrixRowsCols = null; } TestUtils.assertEqualColsAndRows(mb, cmb, bufferedToString); + } catch(Exception e) { e.printStackTrace(); @@ -375,7 +389,7 @@ public void testDecompress() { try { if(!(cmb instanceof CompressedMatrixBlock)) return; // Input was not compressed then just pass test - + ((CompressedMatrixBlock) cmb).clearSoftReferenceToDecompressed(); MatrixBlock decompressedMatrixBlock = ((CompressedMatrixBlock) cmb).decompress(_k); compareResultMatrices(mb, decompressedMatrixBlock, 1); assertEquals(bufferedToString, mb.getNonZeros(), decompressedMatrixBlock.getNonZeros()); @@ -902,10 +916,13 @@ public void testSlice(int rl, int ru, int cl, int cu) { try { if(!(cmb instanceof CompressedMatrixBlock) || rows * cols > 10000) return; - MatrixBlock ret2 = cmb.slice(rl, ru, cl, cu); - MatrixBlock ret1 = mb.slice(rl, ru, cl, cu); - if(!(ret2 instanceof CompressedMatrixBlock)) - assertEquals(ret1.getNonZeros(), ret2.getNonZeros()); + final MatrixBlock ret2 = cmb.slice(rl, ru, cl, cu); + final MatrixBlock ret1 = mb.slice(rl, ru, cl, cu); + final long nnz1 = ret1.getNonZeros(); + final long nnz2 = ret2.getNonZeros(); + if(!(ret2 instanceof CompressedMatrixBlock) && nnz1 != nnz2) + fail(bufferedToString + "\nNot same number of non zeros " + nnz1 + " != " + nnz2); + compareResultMatrices(ret1, ret2, 1); } catch(Exception e) { diff --git a/src/test/java/org/apache/sysds/test/component/compress/ExtendedMatrixTests.java b/src/test/java/org/apache/sysds/test/component/compress/ExtendedMatrixTests.java index 3c5be85ee96..924f5ef374b 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/ExtendedMatrixTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/ExtendedMatrixTests.java @@ -76,12 +76,15 @@ public static Collection data() { SparsityType st = SparsityType.FULL; ValueType vt = ValueType.RLE_COMPRESSIBLE; ValueRange vr = ValueRange.SMALL; - MatrixTypology mt = MatrixTypology.SMALL; + MatrixTypology mt = MatrixTypology.LARGE; OverLapping ov = OverLapping.NONE; for(CompressionSettingsBuilder cs : usedCompressionSettings) tests.add(new Object[] {st, vt, vr, cs, mt, ov, 1, null}); + ov = OverLapping.PLUS_ROW_VECTOR; + for(CompressionSettingsBuilder cs : usedCompressionSettings) + tests.add(new Object[] {st, vt, vr, cs, mt, ov, 1, null}); return tests; } @@ -132,7 +135,7 @@ public void testSum() { else if(OverLapping.effectOnOutput(overlappingType)) assertTrue(bufferedToString, TestUtils.getPercentDistance(ret2, ret1, true) > .99); else - TestUtils.compareScalarBitsJUnit(ret2, ret1, 3, bufferedToString); // Should be exactly same value + TestUtils.compareScalarBitsJUnit(ret2, ret1, 100, bufferedToString); // Should be exactly same value } diff --git a/src/test/java/org/apache/sysds/test/component/compress/TestConstants.java b/src/test/java/org/apache/sysds/test/component/compress/TestConstants.java index a416b547e1f..7341bc5a3d8 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/TestConstants.java +++ b/src/test/java/org/apache/sysds/test/component/compress/TestConstants.java @@ -34,8 +34,7 @@ public enum ValueType { RAND_ROUND, // Values rounded to nearest whole numbers. OLE_COMPRESSIBLE, // Ideal inputs for OLE Compression. RLE_COMPRESSIBLE, // Ideal inputs for RLE Compression. - ONE_HOT, - UNBALANCED_SPARSE, // An input where some columns are super dense and some very sparse + ONE_HOT, UNBALANCED_SPARSE, // An input where some columns are super dense and some very sparse } public enum MatrixTypology { @@ -55,7 +54,8 @@ public enum ValueRange { } public enum OverLapping { - COL, MATRIX, NONE, MATRIX_PLUS, MATRIX_MULT_NEGATIVE, SQUASH, PLUS, APPEND_EMPTY, APPEND_CONST, PLUS_LARGE, C_BIND_SELF; + COL, MATRIX, NONE, MATRIX_PLUS, MATRIX_MULT_NEGATIVE, SQUASH, PLUS, APPEND_EMPTY, APPEND_CONST, PLUS_LARGE, + C_BIND_SELF, PLUS_ROW_VECTOR; public static boolean effectOnOutput(OverLapping opcode) { switch(opcode) { diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java index 795803f9275..6956e0e37c5 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java @@ -19,16 +19,21 @@ package org.apache.sysds.test.component.compress.mapping; +import static org.junit.Assert.fail; + import java.util.ArrayList; import java.util.Collection; import java.util.Random; +import org.apache.commons.lang.NotImplementedException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToByte; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; +import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; +import org.apache.sysds.runtime.compress.colgroup.offset.OffsetByte; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.test.TestUtils; import org.junit.Test; @@ -46,6 +51,9 @@ public class MappingPreAggregateTests { public final int size; private AMapToData m; private MapToByte ref; + private AOffset o; + private final MatrixBlock mb; // matrix block to preAggregate from. + private final double[] preRef; @Parameters public static Collection data() { @@ -67,6 +75,11 @@ public MappingPreAggregateTests(int seed, MAP_TYPE type, int size) { this.type = type; this.size = size; genBitMap(seed); + + mb = TestUtils.generateTestMatrixBlock(2, size, 0, 100, 1.0, seed); + preRef = new double[m.getUnique()]; + o = OneOffset.create(size); + ref.preAggregateDense(mb, preRef, 0, 1, 0, size); } protected AMapToData genBitMap(int seed) { @@ -85,20 +98,69 @@ protected AMapToData genBitMap(int seed) { @Test public void testPreAggregateDense() { - int nUnique = m.getUnique(); - int size = m.size(); + try { + final int size = m.size(); + MatrixBlock mb = TestUtils.generateTestMatrixBlock(1, size, 0, 100, 1.0, seed); + double[] pre = new double[m.getUnique()]; + m.preAggregateDense(mb, pre, 0, 1, 0, size); + TestUtils.compareMatrices(preRef, pre, 0.00001); + } + catch(Exception e) { + e.printStackTrace(); + fail(e.toString()); + } + } - MatrixBlock mb = TestUtils.generateTestMatrixBlock(1, size, 0, 100, 1.0, seed); - MatrixBlock pre = new MatrixBlock(1, nUnique, false); - pre.allocateDenseBlock(); + @Test + public void testPreAggregateDenseWithIndexes() { + switch(type) { + case BIT: + case INT: + return; + default: + try { + final int size = m.size(); + MatrixBlock mb = TestUtils.generateTestMatrixBlock(1, size, 0, 100, 1.0, seed); + double[] pre = new double[m.getUnique()]; + m.preAggregateDense(mb, pre, 0, 1, 0, size, o); + TestUtils.compareMatrices(preRef, pre, 0.00001); + } + catch(Exception e) { + e.printStackTrace(); + fail(e.toString()); + } + } + } - m.preAggregateDense(mb, pre, 0, 1, 0, 100); + @Test(expected = NotImplementedException.class) + public void testPreAggregateDenseWithIndexesExceptionExpected() { + switch(type) { + case BIT: + case INT: + m.preAggregateDense(mb, null, 0, 1, 0, size, o); + default: + throw new NotImplementedException(); + } + } + + @Test(expected = NotImplementedException.class) + public void testPreAggregateDenseExceptionExpected() { + m.preAggregateDense(mb, null, 0, 2, 0, size); + } - MatrixBlock preRef = new MatrixBlock(1, nUnique, false); - preRef.allocateDenseBlock(); - - ref.preAggregateDense(mb, preRef, 0, 1,0,100); + private static class OneOffset extends OffsetByte { - TestUtils.compareMatrices(preRef, pre, 0, "preaggregate not same with different maps"); + private OneOffset(byte[] offsets, int offsetToFirst, int offsetToLast) { + super(offsets, offsetToFirst, offsetToLast); + } + + protected static OneOffset create(int length) { + int offsetToFirst = 0; + int offsetToLast = length - 1; + byte[] offsets = new byte[length - 1]; + for(int i = 0; i < offsets.length; i++) + offsets[i] = 1; + return new OneOffset(offsets, offsetToFirst, offsetToLast); + } } } diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java index 2bb813831c5..8509d3e46c8 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java @@ -64,7 +64,7 @@ public static Collection data() { tests.add(new Object[] {4, t, 63, false}); tests.add(new Object[] {3, t, 64, false}); tests.add(new Object[] {3, t, 65, false}); - tests.add(new Object[] {5, t, 64+63, false}); + tests.add(new Object[] {5, t, 64 + 63, false}); tests.add(new Object[] {5, t, 1234, false}); tests.add(new Object[] {5, t, 13, true}); } @@ -107,6 +107,7 @@ protected static AMapToData genMap(AMapToData m, int[] expected, int max, boolea // to make sure that the bit set is actually filled. m.set(size - 1, max); + expected[size - 1] = max; return m; } @@ -205,6 +206,32 @@ public void replaceMax() { } } + @Test + public void getCountsWithDefault() { + switch(type) { + case CHAR: + case BIT: + case INT: + return; + default: + int[] counts = m.getCounts(new int[m.getUnique() + 1], size + 10); + if(10 != counts[m.getUnique()]) { + fail("Incorrect number of unique values:" + m + "\n" + Arrays.toString(counts)); + } + } + } + + @Test + public void getCountsNoDefault() { + switch(type) { + case CHAR: + case INT: + return; + default: + m.getCounts(new int[m.getUnique()], size); + } + } + @Test public void replaceMin() { int max = m.getUpperBoundValue(); @@ -217,6 +244,17 @@ public void replaceMin() { } } + @Test + public void getUnique() { + switch(type) { + case INT: + return; + default: + int u = m.getUnique(); + assertEquals(m.getUpperBoundValue() + 1, u); + } + } + @Test public void testInMemorySize() { long inMemorySize = m.getInMemorySize(); diff --git a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetNegativeTests.java b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetNegativeTests.java deleted file mode 100644 index ebf81a3ce14..00000000000 --- a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetNegativeTests.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysds.test.component.compress.offset; - -import static org.junit.Assert.fail; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; - -import org.apache.commons.lang.NotImplementedException; -import org.apache.sysds.runtime.compress.colgroup.offset.AIterator; -import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; -import org.apache.sysds.runtime.compress.colgroup.offset.OffsetByte; -import org.apache.sysds.runtime.compress.colgroup.offset.OffsetChar; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(value = Parameterized.class) -public class OffsetNegativeTests { - - private enum TYPE { - BYTE, CHAR - } - - @Parameterized.Parameter - public int[] data; - @Parameterized.Parameter(1) - public TYPE type; - - @Parameters - public static Collection data() { - ArrayList tests = new ArrayList<>(); - // It is assumed that the input is in sorted order, all values are positive and there are no duplicates. - for(TYPE t : TYPE.values()) { - tests.add(new Object[] {new int[] {1, 1,}, t}); - tests.add(new Object[] {new int[] {2, 2, 2, 2}, t}); - tests.add(new Object[] {new int[] {1, 2, 3, 4, 5, 5}, t}); - tests.add(new Object[] {null, t}); - tests.add(new Object[] {new int[] {}, t}); - - } - return tests; - } - - @Test(expected = Exception.class) - public void testConstruction() { - switch(type) { - case BYTE: - testConstruction(new OffsetByte(data)); - break; - case CHAR: - testConstruction(new OffsetChar(data)); - break; - default: - throw new NotImplementedException("not implemented"); - } - - } - - public void testConstruction(AOffset o) { - AIterator i = o.getIterator(); - for(int j = 0; j < data.length; j++) { - - if(data[j] != i.value()) - fail("incorrect result using : " + o.getClass().getSimpleName() + " expected: " + Arrays.toString(data) - + " but was :" + o.toString()); - if(i.hasNext()) - i.next(); - } - } - -} diff --git a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetSingleTests.java b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetSingleTests.java index 3fe8393d475..5ec39127e15 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetSingleTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetSingleTests.java @@ -19,23 +19,29 @@ package org.apache.sysds.test.component.compress.offset; +import static org.junit.Assert.assertTrue; + +import org.apache.commons.lang.NotImplementedException; +import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory; import org.junit.Test; public class OffsetSingleTests { - @Test(expected = RuntimeException.class) - public void testInvalidSize_01() { - OffsetFactory.estimateInMemorySize(-1, 100); + @Test + public void testEmptyEstimateMemory() { + assertTrue(OffsetFactory.estimateInMemorySize(0, 10000) < 10); } - @Test(expected = RuntimeException.class) - public void testInvalidSize_02() { - OffsetFactory.estimateInMemorySize(10, -1); + @Test(expected = NotImplementedException.class) + public void testNotImplementedMultirowAggregationChar() { + AOffset a = OffsetFactory.createOffset(new int[] {1, 2, 3, 4, 5}); + a.preAggregateDenseMap(null, null, 0, 2, 0, 5, -1, (char[]) null); } - @Test(expected = RuntimeException.class) - public void testInvalidCreation() { - OffsetFactory.create(new int[] {1, 2, 3, -1}); + @Test(expected = NotImplementedException.class) + public void testNotImplementedMultirowAggregationByte() { + AOffset a = OffsetFactory.createOffset(new int[] {1, 2, 3, 4, 5}); + a.preAggregateDenseMap(null, null, 0, 2, 0, 5, -1, (byte[]) null); } } diff --git a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestPreAggregate.java b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestPreAggregate.java new file mode 100644 index 00000000000..f7cec1f1407 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestPreAggregate.java @@ -0,0 +1,460 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.compress.offset; + +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; + +import org.apache.commons.lang.NotImplementedException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.commons.math3.util.Precision; +import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; +import org.apache.sysds.runtime.compress.colgroup.offset.OffsetByte; +import org.apache.sysds.runtime.compress.colgroup.offset.OffsetChar; +import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory.OFF_TYPE; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.test.TestUtils; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(value = Parameterized.class) +public class OffsetTestPreAggregate { + protected static final Log LOG = LogFactory.getLog(OffsetTestPreAggregate.class.getName()); + + private static final double eps = 0.00001; + + private final int[] data; + private final AOffset a; + + private final MatrixBlock leftM; + + // sum of indexes row 1. + private final double[] s; + + @Parameters + public static Collection data() { + ArrayList tests = new ArrayList<>(); + // It is assumed that the input is in sorted order, all values are positive and there are no duplicates. + // note that each tests allocate an matrix of two rows, and the last value length. + // therefore don't make it to large. + for(OFF_TYPE t : OFF_TYPE.values()) { + tests.add(new Object[] {new int[] {1, 2}, t}); + tests.add(new Object[] {new int[] {2, 142}, t}); + tests.add(new Object[] {new int[] {142, 421}, t}); + tests.add(new Object[] {new int[] {1, 1023}, t}); + tests.add(new Object[] {new int[] {1023, 1024}, t}); + tests.add(new Object[] {new int[] {1023}, t}); + tests.add(new Object[] {new int[] {0, 1, 2, 3, 4, 5}, t}); + tests.add(new Object[] {new int[] {0}, t}); + tests.add(new Object[] {new int[] {0, 256}, t}); + tests.add(new Object[] {new int[] {0, 254}, t}); + tests.add(new Object[] {new int[] {0, 256 * 2}, t}); + tests.add(new Object[] {new int[] {0, 255 * 2}, t}); + tests.add(new Object[] {new int[] {0, 254 * 2}, t}); + tests.add(new Object[] {new int[] {0, 510, 765}, t}); + tests.add(new Object[] {new int[] {0, 254 * 3}, t}); + tests.add(new Object[] {new int[] {0, 255, 255 * 2, 255 * 3}, t}); + tests.add(new Object[] {new int[] {0, 255 * 2, 255 * 3}, t}); + tests.add(new Object[] {new int[] {0, 255 * 2, 255 * 3, 255 * 10}, t}); + tests.add(new Object[] {new int[] {0, 255 * 3}, t}); + tests.add(new Object[] {new int[] {0, 255 * 4}, t}); + tests.add(new Object[] {new int[] {0, 256 * 3}, t}); + tests.add(new Object[] {new int[] {255 * 3, 255 * 5}, t}); + tests.add(new Object[] {new int[] {0, 1, 2, 3, 255 * 4, 1500}, t}); + tests.add(new Object[] {new int[] {0, 1, 2, 3, 4, 5}, t}); + tests.add(new Object[] {new int[] {0, 1, 2, 3, 4, 5, 125, 142, 161, 1661, 2314}, t}); + tests.add(new Object[] {new int[] {51, 4251, Character.MAX_VALUE}, t}); + } + return tests; + } + + public OffsetTestPreAggregate(int[] data, OFF_TYPE type) { + this.data = data; + switch(type) { + case BYTE: + this.a = new OffsetByte(data); + break; + case CHAR: + this.a = new OffsetChar(data); + break; + default: + throw new NotImplementedException("not implemented"); + } + this.leftM = TestUtils.generateTestMatrixBlock(2, data[data.length - 1] + 100, -1, 100, 1.0, 1342); + this.s = sumIndexes(); + } + + @Test + public void testToString() { + String obs = getString(a); + String vs = Arrays.toString(data); + if(!obs.equals(vs)) + fail("The strings are not equivalent "); + } + + @Test + public void preAggByteMapFirstRowByte() { + preAggMapRowByte(0); + } + + @Test + public void preAggByteMapSecondRowByte() { + preAggMapRowByte(1); + } + + @Test + public void preAggByteMapFirstRowChar() { + preAggMapRowChar(0); + } + + @Test + public void preAggByteMapSecondRowChar() { + preAggMapRowChar(1); + } + + private void preAggMapRowChar(int row) { + double[] preAV = new double[1]; + char[] m = new char[data.length]; + a.preAggregateDenseMap(this.leftM, preAV, row, 1 + row, 0, leftM.getNumColumns(), 0, m); + verifyPreAggMapRowByte(preAV, row); + } + + private void preAggMapRowByte(int row) { + double[] preAV = new double[1]; + byte[] m = new byte[data.length]; + a.preAggregateDenseMap(this.leftM, preAV, row, 1 + row, 0, leftM.getNumColumns(), 0, m); + + verifyPreAggMapRowByte(preAV, row); + } + + private void verifyPreAggMapRowByte(double[] preAV, int row) { + + if(preAV[0] != s[row]) + fail("The preaggregate result is not the sum! : " + preAV[0] + " vs " + s[row]); + } + + @Test + public void preAggByteMapFirstRowByteAll1() { + preAggMapRowByteAll1(0); + } + + @Test + public void preAggByteMapSecondRowByteAll1() { + preAggMapRowByteAll1(1); + } + + @Test + public void preAggByteMapFirstRowCharAll1() { + preAggMapRowCharAll1(0); + } + + @Test + public void preAggByteMapSecondRowCharAll1() { + preAggMapRowCharAll1(1); + } + + private void preAggMapRowCharAll1(int row) { + double[] preAV = new double[2]; + char[] m = new char[data.length]; + fill(m, (char) 1); + a.preAggregateDenseMap(this.leftM, preAV, row, 1 + row, 0, leftM.getNumColumns(), 0, m); + verifyPreAggMapRowAllBytes1(preAV, row); + } + + private void preAggMapRowByteAll1(int row) { + double[] preAV = new double[2]; + byte[] m = new byte[data.length]; + fill(m, (byte) 1); + a.preAggregateDenseMap(this.leftM, preAV, row, 1 + row, 0, leftM.getNumColumns(), 0, m); + verifyPreAggMapRowAllBytes1(preAV, row); + } + + private void verifyPreAggMapRowAllBytes1(double[] preAV, int row) { + if(preAV[0] != 0) + fail("aggregate to wrong index"); + if(preAV[1] != s[row]) + fail("The preaggregate result is not the sum! : " + preAV[0] + " vs " + s[row]); + } + + @Test + public void preAggByteMapFirstRowByteOne1() { + preAggMapRowByteOne1(0); + } + + @Test + public void preAggByteMapSecondRowByteOne1() { + preAggMapRowByteOne1(1); + } + + @Test + public void preAggByteMapFirstRowCharOne1() { + preAggMapRowCharOne1(0); + } + + @Test + public void preAggByteMapSecondRowCharOne1() { + preAggMapRowCharOne1(1); + } + + private void preAggMapRowCharOne1(int row) { + if(data.length > 1) { + double[] preAV = new double[2]; + char[] m = new char[data.length]; + m[1] = 1; + a.preAggregateDenseMap(this.leftM, preAV, row, 1 + row, 0, leftM.getNumColumns(), 0, m); + verifyPreAggMapRowOne1(preAV, row); + } + } + + private void preAggMapRowByteOne1(int row) { + if(data.length > 1) { + double[] preAV = new double[2]; + byte[] m = new byte[data.length]; + m[1] = 1; + a.preAggregateDenseMap(this.leftM, preAV, row, 1 + row, 0, leftM.getNumColumns(), 0, m); + verifyPreAggMapRowOne1(preAV, row); + } + } + + private void verifyPreAggMapRowOne1(double[] preAV, int row) { + double v = leftM.getValue(row, data[1]); + if(preAV[1] != v) + fail("aggregate to wrong index"); + if(!Precision.equals(preAV[0], s[row] - v, eps)) + fail("The preaggregate result is not the sum! : " + preAV[0] + " vs " + (s[row] - v)); + } + + @Test + public void preAggByteMapFirstSubOfRowByte() { + preAggMapSubOfRowByte(0); + } + + @Test + public void preAggByteMapSecondSubOfRowByte() { + preAggMapSubOfRowByte(1); + } + + @Test + public void preAggByteMapFirstSubOfRowChar() { + preAggMapSubOfRowChar(0); + } + + @Test + public void preAggByteMapSecondSubOfRowChar() { + preAggMapSubOfRowChar(1); + } + + private void preAggMapSubOfRowChar(int row) { + if(data.length > 2) { + double[] preAV = new double[2]; + char[] m = new char[data.length]; + m[1] = 1; + a.preAggregateDenseMap(this.leftM, preAV, row, 1 + row, 0, data[data.length - 1], 0, m); + verifyPreAggMapSubOfRow(preAV, row); + } + } + + private void preAggMapSubOfRowByte(int row) { + if(data.length > 2) { + double[] preAV = new double[2]; + byte[] m = new byte[data.length]; + m[1] = 1; + a.preAggregateDenseMap(this.leftM, preAV, row, 1 + row, 0, data[data.length - 1], 0, m); + verifyPreAggMapSubOfRow(preAV, row); + } + } + + private void verifyPreAggMapSubOfRow(double[] preAV, int row) { + double v = leftM.getValue(row, data[1]); + double v2 = leftM.getValue(row, data[data.length - 1]); + if(preAV[1] != v) + fail("aggregate to wrong index"); + if(!Precision.equals(preAV[0], s[row] - v - v2, eps)) + fail("The preaggregate result is not the sum! : " + preAV[0] + " vs " + (s[row] - v - v2)); + } + + @Test + public void preAggByteMapFirstSubOfRowV2Byte() { + preAggMapSubOfRowV2Byte(0, 2); + } + + @Test + public void preAggByteMapSecondSubOfRowV2Byte() { + preAggMapSubOfRowV2Byte(1, 2); + } + + @Test + public void preAggByteMapFirstSubOfRowV2Char() { + preAggMapSubOfRowV2Char(0, 2); + } + + @Test + public void preAggByteMapSecondSubOfRowV2Char() { + preAggMapSubOfRowV2Char(1, 2); + } + + @Test + public void preAggByteMapFirstSubOfRowV2ByteV2() { + preAggMapSubOfRowV2Byte(0, 244); + } + + @Test + public void preAggByteMapSecondSubOfRowV2ByteV2() { + preAggMapSubOfRowV2Byte(1, 244); + } + + @Test + public void preAggByteMapFirstSubOfRowV2CharV2() { + preAggMapSubOfRowV2Char(0, 244); + } + + @Test + public void preAggByteMapSecondSubOfRowV2CharV2() { + preAggMapSubOfRowV2Char(1, 244); + } + + private void preAggMapSubOfRowV2Char(int row, int nVal) { + if(data.length > 3) { + double[] preAV = new double[2]; + char[] m = new char[data.length]; + m[1] = 1; + a.preAggregateDenseMap(this.leftM, preAV, row, 1 + row, 0, data[data.length - 2], nVal, m); + verifyPreAggMapSubOfRowV2Byte(preAV, row); + } + } + + private void preAggMapSubOfRowV2Byte(int row, int nVal) { + if(data.length > 3) { + double[] preAV = new double[2]; + byte[] m = new byte[data.length]; + m[1] = 1; + a.preAggregateDenseMap(this.leftM, preAV, row, 1 + row, 0, data[data.length - 2], nVal, m); + verifyPreAggMapSubOfRowV2Byte(preAV, row); + } + } + + private void verifyPreAggMapSubOfRowV2Byte(double[] preAV, int row) { + double v = leftM.getValue(row, data[1]); + double v2 = leftM.getValue(row, data[data.length - 1]) + leftM.getValue(row, data[data.length - 2]); + if(preAV[1] != v) + fail("aggregate to wrong index"); + if(!Precision.equals(preAV[0], s[row] - v - v2, eps)) + fail("The preaggregate result is not the sum! : " + preAV[0] + " vs " + (s[row] - v - v2)); + } + + @Test + public void preAggByteMapFirstOutOfRangeBeforeByte() { + preAggMapOutOfRangeBeforeByte(0); + } + + @Test + public void preAggByteMapSecondOutOfRangeBeforeByte() { + preAggMapOutOfRangeBeforeByte(1); + } + + @Test + public void preAggByteMapFirstOutOfRangeBeforeChar() { + preAggMapOutOfRangeBeforeChar(0); + } + + @Test + public void preAggByteMapSecondOutOfRangeBeforeChar() { + preAggMapOutOfRangeBeforeChar(1); + } + + private void preAggMapOutOfRangeBeforeChar(int row) { + double[] preAV = null; // should not need access this therefore we make a null argument here. + char[] m = new char[data.length]; + a.preAggregateDenseMap(this.leftM, preAV, row, 1 + row, -412, data[0] - 1, 0, m); + } + + private void preAggMapOutOfRangeBeforeByte(int row) { + double[] preAV = null; // should not need access this therefore we make a null argument here. + byte[] m = new byte[data.length]; + a.preAggregateDenseMap(this.leftM, preAV, row, 1 + row, -412, data[0] - 1, 0, m); + } + + @Test + public void preAggByteMapFirstOutOfRangeAfterByte() { + preAggMapOutOfRangeAfterByte(0); + } + + @Test + public void preAggByteMapSecondOutOfRangeAfterByte() { + preAggMapOutOfRangeAfterByte(1); + } + + @Test + public void preAggByteMapFirstOutOfRangeAfterChar() { + preAggMapOutOfRangeAfterChar(0); + } + + @Test + public void preAggByteMapSecondOutOfRangeAfterChar() { + preAggMapOutOfRangeAfterChar(1); + } + + private void preAggMapOutOfRangeAfterChar(int row) { + double[] preAV = null; // should not need access this therefore we make a null argument here. + char[] m = new char[data.length]; + int id = data[data.length - 1] + 10; + a.preAggregateDenseMap(this.leftM, preAV, row, 1 + row, id, id + 10, 0, m); + } + + private void preAggMapOutOfRangeAfterByte(int row) { + double[] preAV = null; // should not need access this therefore we make a null argument here. + byte[] m = new byte[data.length]; + int id = data[data.length - 1] + 10; + a.preAggregateDenseMap(this.leftM, preAV, row, 1 + row, id, id + 10, 0, m); + } + + private final double[] sumIndexes() { + double[] lmv = leftM.getDenseBlockValues(); + double[] ret = new double[leftM.getNumRows()]; + for(int j = 0; j < leftM.getNumRows(); j++) { + final int off = j * leftM.getNumColumns(); + for(int i = 0; i < data.length; i++) + ret[j] += lmv[data[i] + off]; + } + return ret; + } + + private final void fill(byte[] a, byte v) { + for(int i = 0; i < a.length; i++) + a[i] = v; + } + + private final void fill(char[] a, char v) { + for(int i = 0; i < a.length; i++) + a[i] = v; + } + + private String getString(AOffset a) { + String os = a.toString(); + return os.substring(os.indexOf("["), os.length()); + } + +} diff --git a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java index 0fca9742217..a7c03284143 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java @@ -20,6 +20,8 @@ package org.apache.sysds.test.component.compress.offset; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -33,6 +35,8 @@ import java.util.Collection; import org.apache.commons.lang.NotImplementedException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.DMLCompressionException; import org.apache.sysds.runtime.compress.colgroup.offset.AIterator; import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; @@ -47,8 +51,9 @@ @RunWith(value = Parameterized.class) public class OffsetTests { + protected static final Log LOG = LogFactory.getLog(OffsetTests.class.getName()); - private static final long sizeTolerance = 265; + private static final long sizeTolerance = 100; public int[] data; public OFF_TYPE type; @@ -72,17 +77,25 @@ public static Collection data() { tests.add(new Object[] {new int[] {0, 256}, t}); tests.add(new Object[] {new int[] {0, 254}, t}); tests.add(new Object[] {new int[] {0, Character.MAX_VALUE}, t}); + tests.add(new Object[] {new int[] {0, Character.MAX_VALUE, ((int) Character.MAX_VALUE) * 2}, t}); + tests.add(new Object[] {new int[] {2, Character.MAX_VALUE + 2}, t}); tests.add(new Object[] {new int[] {0, ((int) Character.MAX_VALUE) + 1}, t}); tests.add(new Object[] {new int[] {0, ((int) Character.MAX_VALUE) - 1}, t}); tests.add(new Object[] {new int[] {0, 256 * 2}, t}); tests.add(new Object[] {new int[] {0, 255 * 2}, t}); tests.add(new Object[] {new int[] {0, 254 * 2}, t}); + tests.add(new Object[] {new int[] {0, 510, 765}, t}); tests.add(new Object[] {new int[] {0, 254 * 3}, t}); + tests.add(new Object[] {new int[] {0, 255, 255 * 2, 255 * 3}, t}); + tests.add(new Object[] {new int[] {0, 255 * 2, 255 * 3}, t}); + tests.add(new Object[] {new int[] {0, 255 * 2, 255 * 3, 255 * 10}, t}); tests.add(new Object[] {new int[] {0, 255 * 3}, t}); + tests.add(new Object[] {new int[] {0, 255 * 4}, t}); tests.add(new Object[] {new int[] {0, 256 * 3}, t}); tests.add(new Object[] {new int[] {255 * 3, 255 * 5}, t}); tests.add(new Object[] {new int[] {1000000, 1000000 + 255 * 5}, t}); tests.add(new Object[] {new int[] {100000000, 100000000 + 255 * 5}, t}); + tests.add(new Object[] {new int[] {100000000, 100001275, 100001530}, t}); tests.add(new Object[] {new int[] {0, 1, 2, 3, 255 * 4, 1500}, t}); tests.add(new Object[] {new int[] {0, 1, 2, 3, 4, 5}, t}); tests.add(new Object[] {new int[] {2458248, 2458249, 2458253, 2458254, 2458256, 2458257, 2458258, 2458262, @@ -109,8 +122,7 @@ public OffsetTests(int[] data, OFF_TYPE type) { @Test public void testConstruction() { try { - AIterator i = o.getIterator(); - compare(i, data); + compare(o, data); } catch(Exception e) { e.printStackTrace(); @@ -118,6 +130,30 @@ public void testConstruction() { } } + @Test + public void testCacheExists() { + if(data.length > 2) { + AIterator i = o.getIterator(); + i.next(); + o.cacheIterator(i, data[1]); + AIterator ii = o.getIterator(data[1]); + assertTrue(ii.equals(i)); + ii.next(); + assertFalse(ii.equals(i)); + } + } + + @Test + public void testCacheDontExists() { + if(data.length > 2) { + AIterator i = o.getIterator(); + i.next(); + o.cacheIterator(i, data[1]); + AIterator ii = o.getIterator(data[2]); + assertFalse(ii.equals(i)); + } + } + @Test public void testSerialization() { try { @@ -131,9 +167,7 @@ public void testSerialization() { DataInputStream fis = new DataInputStream(bis); AOffset n = OffsetFactory.readIn(fis); - - AIterator i = n.getIterator(); - compare(i, data); + compare(n, data); } catch(IOException e) { throw new RuntimeException("Error in io", e); @@ -170,23 +204,25 @@ public void testOnDiskSizeInBytes() { } @Test - public void testInMemoryEstimateIsSameAsActualOrSmaller() { + public void testInMemoryEstimateIsSameAsActualOrLarger() { try { - long inMemorySize = o.getInMemorySize(); + final long inMemorySize = o.getInMemorySize(); long estimatedSize; switch(type) { case BYTE: - estimatedSize = OffsetByte.getInMemorySize(data.length); + estimatedSize = OffsetByte.estimateInMemorySize(data.length, data[data.length - 1] - data[0]); break; case CHAR: - estimatedSize = OffsetChar.getInMemorySize(data.length); + estimatedSize = OffsetChar.estimateInMemorySize(data.length, data[data.length - 1] - data[0]); break; default: throw new DMLCompressionException("Unknown input"); } - final String errorMessage = "in memory size: " + inMemorySize + " is not smaller than estimate: " - + estimatedSize + " with tolerance " + sizeTolerance; - assertTrue(errorMessage, inMemorySize - sizeTolerance <= estimatedSize); + if(!(inMemorySize <= estimatedSize + sizeTolerance)) { + + fail("in memory size: " + inMemorySize + " is not smaller than estimate: " + estimatedSize + + " with tolerance " + sizeTolerance); + } } catch(Exception e) { e.printStackTrace(); @@ -194,15 +230,174 @@ public void testInMemoryEstimateIsSameAsActualOrSmaller() { } } - private void compare(AIterator i, int[] v) { - for(int j = 0; j < v.length; j++) { + @Test + public void testSkipToContainedIndex() { + try { + assertEquals(data[data.length - 1], o.getIterator().skipTo(data[data.length - 1])); + } + catch(Exception e) { + e.printStackTrace(); + fail("Failed skipping to last index"); + } + } + @Test + public void testSkipToContainedIndexPlusOne() { + try { + assertNotEquals(data[data.length - 1] + 1, o.getIterator().skipTo(data[data.length - 1])); + } + catch(Exception e) { + e.printStackTrace(); + fail("Failed skipping to last index"); + } + } + + @Test + public void testSkipToContainedIndexPlusN() { + try { + if(data.length > 1) + assertTrue(data[1] <= o.getIterator().skipTo(data[1] + 1)); + } + catch(Exception e) { + e.printStackTrace(); + fail("Failed skipping to last index"); + } + } + + @Test + public void testSkipToContainedIndexMinusOne() { + try { + int v = data[data.length - 1]; + int maxDiff = 1; + assertTrue(v <= o.getIterator().skipTo(v - 1) + maxDiff); + } + catch(Exception e) { + e.printStackTrace(); + fail("Failed skipping to last index"); + } + } + + @Test + public void testSkipToContainedIndexMinusN() { + try { + int v = data[data.length - 1]; + int maxDiff = 142; + assertTrue(v <= o.getIterator().skipTo(v - 1) + maxDiff); + } + catch(Exception e) { + e.printStackTrace(); + fail("Failed skipping to last index"); + } + } + + @Test + public void testToString() { + String os = o.toString(); + os = os.substring(os.indexOf("["), os.length()); + String vs = Arrays.toString(data); + if(!os.equals(vs)) { + fail("The two array string are not equivalent with " + type + "\n" + os + " : " + vs); + } + } + + @Test + public void testIsNotOverFirstDataPoint() { + assertFalse(o.getIterator().isNotOver(data[0])); + } + + @Test + public void testIsNotOverSecondDataPointOnInit() { + if(data.length > 1) + assertTrue(o.getIterator().isNotOver(data[1])); + } + + @Test + public void testIsNotOverSecondDataPointOnInitToSecond() { + if(data.length > 1) + assertFalse(o.getIterator(data[1]).isNotOver(data[1])); + } + + @Test + public void testIsOverFirstDataPointOnInitToSecond() { + if(data.length > 1) + assertFalse(o.getIterator(data[1]).isNotOver(data[0])); + } + + @Test + public void testGetDataIndexOnInit() { + assertTrue(o.getIterator().getDataIndex() == 0); + } + + @Test + public void testGetDataIndexOnInitSkipToFirst() { + if(data.length > 1) + assertTrue(o.getIterator(data[1]).getDataIndex() == 1); + } + + @Test + public void testGetDataIndexOnInitSkipToN() { + if(data.length > 3) + assertTrue(o.getIterator(data[2]).getDataIndex() == 2); + } + + @Test + public void testGetDataAfterNext() { + if(data.length > 1) + testGetDataAfterNextN(o.getIterator()); + } + + @Test + public void testGetDataAfterNext2() { + if(data.length > 2) + testGetDataAfterNextN(o.getIterator(2)); + } + + public void testGetDataAfterNextN(AIterator it) { + int d = it.getDataIndex(); + it.next(); + assertEquals(d + 1, it.getDataIndex()); + } + + @Test + public void testGetDataAfterNextComb() { + if(data.length > 1) + testGetDataAfterNextCombN(o.getIterator()); + } + + @Test + public void testGetDataAfterNextComb2() { + if(data.length > 2) + testGetDataAfterNextCombN(o.getIterator(2)); + } + + public void testGetDataAfterNextCombN(AIterator it) { + int d = it.getDataIndexAndIncrement(); + assertEquals(d + 1, it.getDataIndex()); + } + + @Test + public void testGetUnreasonablyHighSkip() { + assertTrue(o.getIterator(Integer.MAX_VALUE - 1000) == null); + } + + @Test + public void testCacheNullIterator() { + o.cacheIterator(null, 21415); + } + + protected static void compare(AOffset o, int[] v) { + AIterator i = o.getIterator(); + if(v[0] != i.value()) + fail("incorrect result using : " + o.getClass().getSimpleName() + " expected: " + Arrays.toString(v) + + " but was :" + o.toString()); + for(int j = 1; j < v.length; j++) { + i.next(); if(v[j] != i.value()) fail("incorrect result using : " + o.getClass().getSimpleName() + " expected: " + Arrays.toString(v) + " but was :" + o.toString()); - if(i.hasNext()) - i.next(); } + if(i.getOffsetsIndex() != o.getOffsetsLength()) + fail("The allocated offsets are longer than needed: idx " + i.getOffsetsIndex() + " vs len " + + o.getOffsetsLength() + "\n" + Arrays.toString(v)); } - } diff --git a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestsDefaultConstructor.java b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestsDefaultConstructor.java new file mode 100644 index 00000000000..ea9017df549 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestsDefaultConstructor.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.compress.offset; + +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.Collection; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; +import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(value = Parameterized.class) +public class OffsetTestsDefaultConstructor { + protected static final Log LOG = LogFactory.getLog(OffsetTestsDefaultConstructor.class.getName()); + + private static final long sizeTolerance = 100; + + public int[] data; + private AOffset o; + + @Parameters + public static Collection data() { + ArrayList tests = new ArrayList<>(); + // It is assumed that the input is in sorted order, all values are positive and there are no duplicates. + + tests.add(new Object[] {new int[] {1, 2}}); + tests.add(new Object[] {new int[] {2, 142}}); + tests.add(new Object[] {new int[] {142, 421}}); + tests.add(new Object[] {new int[] {1, 1023}}); + tests.add(new Object[] {new int[] {1023, 1024}}); + tests.add(new Object[] {new int[] {1023}}); + tests.add(new Object[] {new int[] {0, 1, 2, 3, 4, 5}}); + tests.add(new Object[] {new int[] {0}}); + tests.add(new Object[] {new int[] {Character.MAX_VALUE, ((int) Character.MAX_VALUE) + 1}}); + tests.add(new Object[] {new int[] {Character.MAX_VALUE, ((int) Character.MAX_VALUE) * 2}}); + tests.add(new Object[] {new int[] {0, 256}}); + tests.add(new Object[] {new int[] {0, 254}}); + tests.add(new Object[] {new int[] {0, Character.MAX_VALUE}}); + tests.add(new Object[] {new int[] {0, Character.MAX_VALUE, ((int) Character.MAX_VALUE) * 2}}); + tests.add(new Object[] {new int[] {2, Character.MAX_VALUE + 2}}); + tests.add(new Object[] {new int[] {0, ((int) Character.MAX_VALUE) + 1}}); + tests.add(new Object[] {new int[] {0, ((int) Character.MAX_VALUE) - 1}}); + tests.add(new Object[] {new int[] {0, 256 * 2}}); + tests.add(new Object[] {new int[] {0, 255 * 2}}); + tests.add(new Object[] {new int[] {0, 254 * 2}}); + tests.add(new Object[] {new int[] {0, 510, 765}}); + tests.add(new Object[] {new int[] {0, 120, 230}}); + tests.add(new Object[] {new int[] {1000, 1120, 1230}}); + tests.add(new Object[] {new int[] {0, 254 * 3}}); + tests.add(new Object[] {new int[] {0, 255, 255 * 2, 255 * 3}}); + tests.add(new Object[] {new int[] {0, 255 * 2, 255 * 3}}); + tests.add(new Object[] {new int[] {0, 255 * 2, 255 * 3, 255 * 10}}); + tests.add(new Object[] {new int[] {0, 255 * 3}}); + tests.add(new Object[] {new int[] {0, 255 * 4}}); + tests.add(new Object[] {new int[] {0, 256 * 3}}); + tests.add(new Object[] {new int[] {255 * 3, 255 * 5}}); + tests.add(new Object[] {new int[] {1000000, 1000000 + 255 * 5}}); + tests.add(new Object[] {new int[] {100000000, 100000000 + 255 * 5}}); + tests.add(new Object[] {new int[] {100000000, 100001275, 100001530}}); + tests.add(new Object[] {new int[] {0, 1, 2, 3, 255 * 4, 1500}}); + tests.add(new Object[] {new int[] {0, 1, 2, 3, 4, 5}}); + tests.add(new Object[] {new int[] {2458248, 2458249, 2458253, 2458254, 2458256, 2458257, 2458258, 2458262, + 2458264, 2458266, 2458267, 2458271, 2458272, 2458275, 2458276, 2458281}}); + + return tests; + } + + public OffsetTestsDefaultConstructor(int[] data) { + this.data = data; + this.o = OffsetFactory.createOffset(data); + } + + @Test + public void testConstruction() { + try { + OffsetTests.compare(o, data); + } + catch(Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testMemoryEstimate(){ + final long est = OffsetFactory.estimateInMemorySize(data.length, data[data.length -1]); + final long act = o.getInMemorySize(); + + if(!( act <= est + sizeTolerance)) + fail("In memory is not smaller than estimate " + est + " " + act); + } +}