Skip to content

Commit

Permalink
[SYSTEMDS-3589] Frame single column ragged array
Browse files Browse the repository at this point in the history
This commit contains code to add a simple ragged array, that allows us
to allocate columns in frames with a lower number of contained materialized
values.

Closes #1857
Closes #1884
  • Loading branch information
OlgaOvcharenko authored and Baunsgaard committed Aug 21, 2023
1 parent a54f513 commit f094eac
Show file tree
Hide file tree
Showing 8 changed files with 346 additions and 120 deletions.
Expand Up @@ -861,7 +861,7 @@ private double arraysSizeInMemory() {
try {
size += pool.submit(() -> {
return Arrays.stream(_coldata).parallel() // parallel columns
.map(x -> x.getInMemorySize()).reduce(0L, Long::sum);
.map(x ->x.getInMemorySize()).reduce(0L, (a,x) -> a + x);
}).get();
}
catch(InterruptedException | ExecutionException e) {
Expand Down
Expand Up @@ -147,8 +147,8 @@ public final int size() {
/**
* Get the value at a given index.
*
* This method returns objects that have a high overhead in allocation. Therefore it is not as efficient as using the
* vectorized operations specified in the object.
* This method returns objects that have a high overhead in allocation. Therefore it is not as efficient as using
* the vectorized operations specified in the object.
*
* @param index The index to query
* @return The value returned as an object
Expand All @@ -168,8 +168,24 @@ public final int size() {
*/
public abstract Object get();

/**
* Get the index's value.
*
* returns 0 in case of Null.
*
* @param i index to get value from
* @return the value
*/
public abstract double getAsDouble(int i);

/**
* Get the index's value.
*
* returns Double.NaN in case of Null.
*
* @param i index to get value from
* @return the value
*/
public double getAsNaNDouble(int i) {
return getAsDouble(i);
}
Expand Down Expand Up @@ -644,7 +660,7 @@ public ArrayCompressionStatistics statistics(int nSamples) {

if(ddcSize < memSize)
return new ArrayCompressionStatistics(memSizePerElement, //
estDistinct, true, getValueType(),FrameArrayType.DDC, memSize, ddcSize);
estDistinct, true, getValueType(), FrameArrayType.DDC, memSize, ddcSize);

return null;
}
Expand Down
Expand Up @@ -35,7 +35,7 @@ public interface ArrayFactory {
public final static int bitSetSwitchPoint = 64;

public enum FrameArrayType {
STRING, BOOLEAN, BITSET, INT32, INT64, FP32, FP64, CHARACTER, OPTIONAL, DDC;
STRING, BOOLEAN, BITSET, INT32, INT64, FP32, FP64, CHARACTER, RAGGED, OPTIONAL, DDC;
}

public static StringArray create(String[] col) {
Expand Down Expand Up @@ -74,6 +74,10 @@ public static <T> OptionalArray<T> create(T[] col) {
return new OptionalArray<>(col);
}

public static <T> RaggedArray<T> create(T[] col, int m) {
return new RaggedArray<T>(col, m);
}

public static long getInMemorySize(ValueType type, int _numRows, boolean containsNull) {
if(containsNull) {
switch(type) {
Expand Down Expand Up @@ -216,6 +220,8 @@ public static Array<?> read(DataInput in, int nRow) throws IOException {
case CHARACTER:
arr = new CharArray(new char[nRow]);
break;
case RAGGED:
return RaggedArray.readRagged(in, nRow);
case OPTIONAL:
return OptionalArray.readOpt(in, nRow);
case DDC:
Expand Down
Expand Up @@ -185,7 +185,7 @@ public FrameArrayType getFrameArrayType() {

@Override
public long getExactSerializedSize() {
return 1 + 2 * _data.length;
return 1L + 2L * _data.length;
}

@Override
Expand Down
Expand Up @@ -459,9 +459,9 @@ public boolean equals(Array<T> other) {
@Override
public String toString() {
StringBuilder sb = new StringBuilder(_size + 2);
sb.append(super.toString() + "<" + _a.getValueType() + ">:[");
sb.append(super.toString()).append("<").append(_a.getClass().getSimpleName()).append(">:[");
for(int i = 0; i < _size - 1; i++)
sb.append(get(i) + ",");
sb.append(get(i)).append(",");
sb.append(get(_size - 1));
sb.append("]");
return sb.toString();
Expand Down

0 comments on commit f094eac

Please sign in to comment.