Skip to content

Commit

Permalink
- Rebasing to master
Browse files Browse the repository at this point in the history
 - Using a set instead of bitmap for keeping track of null columns
  • Loading branch information
icefury71 committed Oct 18, 2019
1 parent a9793f8 commit 3bf130c
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ public final class Schema {
private transient final List<String> _dimensionNames = new ArrayList<>();
private transient final List<String> _metricNames = new ArrayList<>();
private transient final List<String> _dateTimeNames = new ArrayList<>();
private transient final Map<String, Integer> _columnIdMap = new HashMap<>();
private transient int _columnIdCount = 0;

@Nonnull
public static Schema fromFile(@Nonnull File schemaFile)
Expand Down Expand Up @@ -218,7 +216,6 @@ public void addField(@Nonnull FieldSpec fieldSpec) {
}

_fieldSpecMap.put(columnName, fieldSpec);
_columnIdMap.putIfAbsent(columnName, _columnIdCount++);
}

@Deprecated
Expand All @@ -229,7 +226,6 @@ public void addField(@Nonnull String columnName, @Nonnull FieldSpec fieldSpec) {

public boolean removeField(String columnName) {
FieldSpec existingFieldSpec = _fieldSpecMap.remove(columnName);
_columnIdMap.remove(columnName);

if (existingFieldSpec != null) {
FieldType fieldType = existingFieldSpec.getFieldType();
Expand Down Expand Up @@ -289,12 +285,6 @@ public Set<String> getPhysicalColumnNames() {
return physicalColumnNames;
}

@JsonIgnore
@Nonnull
public int getColumnId(String columnName) {
return _columnIdMap.get(columnName);
}

@JsonIgnore
@Nonnull
public Collection<FieldSpec> getAllFieldSpecs() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,15 @@
package org.apache.pinot.core.data.recordtransformer;

import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.ArrayList;
import java.util.Collection;
import java.util.stream.Collectors;
import java.util.Set;
import org.apache.pinot.common.data.FieldSpec;
import org.apache.pinot.common.data.FieldSpec.FieldType;
import org.apache.pinot.common.data.Schema;
import org.apache.pinot.common.utils.CommonConstants;
import org.apache.pinot.core.data.GenericRow;
import org.roaringbitmap.RoaringBitmap;


public class NullValueTransformer implements RecordTransformer {
Expand All @@ -51,16 +49,26 @@ public NullValueTransformer(Schema schema) {

@Override
public GenericRow transform(GenericRow record) {
// Try and reuse the null columns set
Set<String> nullColumnsSet = (Set<String>) record.getValue(CommonConstants.Segment.NULL_FIELDS);
if (nullColumnsSet != null) {
nullColumnsSet.clear();
}

for (Map.Entry<String, Object> entry : _defaultNullValues.entrySet()) {
String fieldName = entry.getKey();
Object value = record.getValue(fieldName);
if (value == null || (value instanceof Object[] && ((Object[]) value).length == 0) || (value instanceof List
&& ((List) value).isEmpty())) {
record.putDefaultNullValue(fieldName, entry.getValue());
if (nullColumnsSet == null) {
nullColumnsSet = new HashSet<>();
}
nullColumnsSet.add(fieldName);
}
}

record.putField(CommonConstants.Segment.NULL_FIELDS, nullColumnBitMap);
record.putValue(CommonConstants.Segment.NULL_FIELDS, nullColumnsSet);
return record;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,10 @@ private void addPresenceVector(GenericRow row, int docId) {
return;
}

RoaringBitmap nullColumnsBitMap = (RoaringBitmap) row.getValue(CommonConstants.Segment.NULL_FIELDS);
Set<String> nullColumnsSet = (Set<String>) row.getValue(CommonConstants.Segment.NULL_FIELDS);
for (FieldSpec fieldSpec : _schema.getAllFieldSpecs()) {
String columnName = fieldSpec.getName();
if (nullColumnsBitMap.contains(_schema.getColumnId(columnName))) {
if (nullColumnsSet.contains(columnName)) {
_presenceVectorMap.get(columnName).setNull(docId);
}
}
Expand Down Expand Up @@ -497,7 +497,11 @@ public List<StarTreeV2> getStarTrees() {
* @return Generic row with physical columns of the specified row.
*/
public GenericRow getRecord(int docId, GenericRow reuse) {
RoaringBitmap nullColumnBitMap = null;
// Try and reuse the null columns set
Set<String> nullColumnsSet = (Set<String>) reuse.getValue(CommonConstants.Segment.NULL_FIELDS);
if (nullColumnsSet != null) {
nullColumnsSet.clear();
}

for (FieldSpec fieldSpec : _physicalFieldSpecs) {
String column = fieldSpec.getName();
Expand All @@ -507,14 +511,14 @@ public GenericRow getRecord(int docId, GenericRow reuse) {

PresenceVectorReader reader = _presenceVectorMap.get(column);
if (reader != null && !reader.isPresent(docId)) {
if (nullColumnBitMap == null) {
nullColumnBitMap = new RoaringBitmap();
if (nullColumnsSet == null) {
nullColumnsSet = new HashSet<>();
}
nullColumnBitMap.add(_schema.getColumnId(column));
nullColumnsSet.add(column);
}
}

reuse.putField(CommonConstants.Segment.NULL_FIELDS, nullColumnBitMap);
reuse.putValue(CommonConstants.Segment.NULL_FIELDS, nullColumnsSet);
return reuse;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,10 +263,7 @@ private boolean createDictionaryForColumn(ColumnIndexCreationInfo info, SegmentG
@Override
public void indexRow(GenericRow row) {
// Determine if we need to process presence vector per row, per column
RoaringBitmap nullColumnsBitMap = null;
if (row.getValue(CommonConstants.Segment.NULL_FIELDS) != null) {
nullColumnsBitMap = (RoaringBitmap) row.getValue(CommonConstants.Segment.NULL_FIELDS);
}
Set<String> nullColumnsSet = (Set<String>) row.getValue(CommonConstants.Segment.NULL_FIELDS);

for (String columnName : _forwardIndexCreatorMap.keySet()) {
Object columnValueToIndex = row.getValue(columnName);
Expand Down Expand Up @@ -295,7 +292,7 @@ public void indexRow(GenericRow row) {
}

// If row has null value for given column name, add to presence vector
if (null != nullColumnsBitMap && nullColumnsBitMap.contains(schema.getColumnId(columnName))) {
if (null != nullColumnsSet && nullColumnsSet.contains(columnName)) {
_presenceVectorCreatorMap.get(columnName).setNull(docIdCounter);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.pinot.core.indexsegment.mutable;

import java.util.Set;
import org.apache.pinot.common.data.Schema;
import org.apache.pinot.core.data.GenericRow;
import org.apache.pinot.core.data.readers.JSONRecordReader;
Expand Down Expand Up @@ -89,15 +90,16 @@ public void testGetRecord() {
GenericRow reuse = new GenericRow();
_mutableSegmentImpl.getRecord(0, reuse);
List<String> nullColumns = new ArrayList<>();
RoaringBitmap nullBitmap = (RoaringBitmap) reuse.getValue(NULL_FIELDS);
Set<String> nullColumnsSet = (Set<String>) reuse.getValue(NULL_FIELDS);
for (String colName : _schema.getColumnNames()) {
if (nullBitmap.contains(_schema.getColumnId(colName))) {
if (nullColumnsSet.contains(colName)) {
nullColumns.add(colName);
}
}
Assert.assertEquals(nullColumns, _finalNullColumns);

_mutableSegmentImpl.getRecord(1, reuse);
Assert.assertNull(reuse.getValue(NULL_FIELDS));
nullColumnsSet = (Set<String>) reuse.getValue(NULL_FIELDS);
Assert.assertTrue(nullColumnsSet.isEmpty());
}
}

0 comments on commit 3bf130c

Please sign in to comment.