-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* preliminary refactoring - extracting index sort algorithms in a standalone IndexSorter
- Loading branch information
Showing
5 changed files
with
146 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
57 changes: 57 additions & 0 deletions
57
dflib/src/main/java/com/nhl/dflib/row/DataFrameRowProxy.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
package com.nhl.dflib.row; | ||
|
||
import com.nhl.dflib.DataFrame; | ||
import com.nhl.dflib.Index; | ||
|
||
public class DataFrameRowProxy implements RowProxy { | ||
|
||
private DataFrame dataFrame; | ||
private int rowIndex; | ||
private int height; | ||
|
||
public DataFrameRowProxy(DataFrame dataFrame) { | ||
this.dataFrame = dataFrame; | ||
this.height = height; | ||
this.rowIndex = -1; | ||
} | ||
|
||
@Override | ||
public Index getIndex() { | ||
return dataFrame.getColumnsIndex(); | ||
} | ||
|
||
@Override | ||
public Object get(int columnPos) { | ||
return dataFrame.getColumn(columnPos).get(rowIndex); | ||
} | ||
|
||
@Override | ||
public Object get(String columnName) { | ||
return dataFrame.getColumn(columnName).get(rowIndex); | ||
} | ||
|
||
@Override | ||
public void copyRange(RowBuilder to, int fromOffset, int toOffset, int len) { | ||
// row can be missing in joins... | ||
if (rowIndex >= 0) { | ||
int w = dataFrame.width(); | ||
for (int i = 0; i < w; i++) { | ||
to.set(i + toOffset, dataFrame.getColumn(i).get(rowIndex)); | ||
} | ||
} | ||
} | ||
|
||
public boolean hasNext() { | ||
return rowIndex + 1 < height; | ||
} | ||
|
||
public DataFrameRowProxy rewind() { | ||
this.rowIndex++; | ||
return this; | ||
} | ||
|
||
public DataFrameRowProxy rewind(int index) { | ||
this.rowIndex = index; | ||
return this; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
package com.nhl.dflib.sort; | ||
|
||
import com.nhl.dflib.ColumnDataFrame; | ||
import com.nhl.dflib.DataFrame; | ||
import com.nhl.dflib.Series; | ||
import com.nhl.dflib.row.DataFrameRowProxy; | ||
import com.nhl.dflib.row.RowProxy; | ||
import com.nhl.dflib.series.ArraySeries; | ||
import com.nhl.dflib.series.IndexedSeries; | ||
|
||
import java.util.Arrays; | ||
import java.util.Comparator; | ||
import java.util.List; | ||
import java.util.function.Supplier; | ||
|
||
public class IndexSorter { | ||
|
||
private DataFrame dataFrame; | ||
private Supplier<Integer[]> indexBuilder; | ||
|
||
public IndexSorter(DataFrame dataFrame) { | ||
this.dataFrame = dataFrame; | ||
this.indexBuilder = () -> rowNumberSequence(dataFrame.height()); | ||
} | ||
|
||
public IndexSorter(DataFrame dataFrame, List<Integer> rangeToSort) { | ||
this.dataFrame = dataFrame; | ||
// copy range to avoid modification of the source list | ||
this.indexBuilder = () -> rangeToSort.toArray(new Integer[rangeToSort.size()]); | ||
} | ||
|
||
protected static Integer[] rowNumberSequence(int h) { | ||
Integer[] rn = new Integer[h]; | ||
for (int i = 0; i < h; i++) { | ||
rn[i] = i; | ||
} | ||
|
||
return rn; | ||
} | ||
|
||
public DataFrame sort(Comparator<RowProxy> comparator) { | ||
|
||
// make sure 'mutableIndex' is not visible outside this method as we are going to modify it, | ||
// so obtain it via the supplier right on the spot | ||
Integer[] mutableIndex = indexBuilder.get(); | ||
|
||
Comparator<Integer> rowComparator = rowIndexComparator(comparator); | ||
|
||
// note - mutating passed index | ||
Arrays.sort(mutableIndex, rowComparator); | ||
Series<Integer> sortedIndex = new ArraySeries<>(mutableIndex); | ||
|
||
int width = dataFrame.width(); | ||
Series<?>[] newColumnsData = new Series[width]; | ||
for (int i = 0; i < width; i++) { | ||
newColumnsData[i] = new IndexedSeries<>(dataFrame.getColumn(i), sortedIndex); | ||
} | ||
|
||
return new ColumnDataFrame(dataFrame.getColumnsIndex(), newColumnsData); | ||
} | ||
|
||
private Comparator<Integer> rowIndexComparator(Comparator<RowProxy> rowComparator) { | ||
DataFrameRowProxy p1 = new DataFrameRowProxy(dataFrame); | ||
DataFrameRowProxy p2 = new DataFrameRowProxy(dataFrame); | ||
return (i1, i2) -> rowComparator.compare(p1.rewind(i1), p2.rewind(i2)); | ||
} | ||
} |
This file was deleted.
Oops, something went wrong.