-
Notifications
You must be signed in to change notification settings - Fork 3.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add stringLast and stringFirst aggregators extension #5789
Changes from 45 commits
a0049fd
09338b9
3658b0c
5975952
adc773b
c2f3672
1c3bd6a
c75c88f
d671922
6945bf9
7dd12e0
217627f
9b68a60
e552f04
48325f3
657e8a3
e8a2ded
4d24159
7eeef86
bbf84b8
b09217c
834662d
482e8bf
046c5a9
9829263
023fc88
e6dee78
1fb6789
42b6ca3
a9f2d61
5a1643f
a9a1069
1e53094
129d594
a5dea44
e7992fa
aa4ff30
1d11d46
33c944f
7ffebe9
acfbed8
ff56f3e
7ff6fc3
b074c5f
21dfdc1
19b55c3
f674212
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package io.druid.query.aggregation; | ||
|
||
import com.fasterxml.jackson.annotation.JsonCreator; | ||
import com.fasterxml.jackson.annotation.JsonProperty; | ||
import io.druid.collections.SerializablePair; | ||
|
||
public class SerializablePairLongString extends SerializablePair<Long, String> | ||
{ | ||
@JsonCreator | ||
public SerializablePairLongString(@JsonProperty("lhs") Long lhs, @JsonProperty("rhs") String rhs) | ||
{ | ||
super(lhs, rhs); | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package io.druid.query.aggregation; | ||
|
||
import io.druid.data.input.InputRow; | ||
import io.druid.java.util.common.StringUtils; | ||
import io.druid.query.aggregation.first.StringFirstAggregatorFactory; | ||
import io.druid.segment.GenericColumnSerializer; | ||
import io.druid.segment.column.ColumnBuilder; | ||
import io.druid.segment.data.GenericIndexed; | ||
import io.druid.segment.data.ObjectStrategy; | ||
import io.druid.segment.serde.ComplexColumnPartSupplier; | ||
import io.druid.segment.serde.ComplexMetricExtractor; | ||
import io.druid.segment.serde.ComplexMetricSerde; | ||
import io.druid.segment.serde.LargeColumnSupportedComplexColumnSerializer; | ||
import io.druid.segment.writeout.SegmentWriteOutMedium; | ||
|
||
import javax.annotation.Nullable; | ||
import java.nio.ByteBuffer; | ||
|
||
/** | ||
* The SerializablePairLongStringSerde serializes a Long-String pair (SerializablePairLongString). | ||
* The serialization structure is: Long:Integer:String | ||
* <p> | ||
* The class is used on first/last String aggregators to store the time and the first/last string. | ||
* Long:Integer:String -> Timestamp:StringSize:StringData | ||
*/ | ||
public class SerializablePairLongStringSerde extends ComplexMetricSerde | ||
{ | ||
|
||
private static final String TYPE_NAME = "serializablePairLongString"; | ||
|
||
@Override | ||
public String getTypeName() | ||
{ | ||
return TYPE_NAME; | ||
} | ||
|
||
@Override | ||
public ComplexMetricExtractor getExtractor() | ||
{ | ||
return new ComplexMetricExtractor() | ||
{ | ||
@Override | ||
public Class<SerializablePairLongString> extractedClass() | ||
{ | ||
return SerializablePairLongString.class; | ||
} | ||
|
||
@Override | ||
public Object extractValue(InputRow inputRow, String metricName) | ||
{ | ||
return inputRow.getRaw(metricName); | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
public void deserializeColumn(ByteBuffer buffer, ColumnBuilder columnBuilder) | ||
{ | ||
final GenericIndexed column = GenericIndexed.read(buffer, getObjectStrategy(), columnBuilder.getFileMapper()); | ||
columnBuilder.setComplexColumn(new ComplexColumnPartSupplier(getTypeName(), column)); | ||
} | ||
|
||
@Override | ||
public ObjectStrategy getObjectStrategy() | ||
{ | ||
return new ObjectStrategy<SerializablePairLongString>() | ||
{ | ||
@Override | ||
public int compare(@Nullable SerializablePairLongString o1, @Nullable SerializablePairLongString o2) | ||
{ | ||
return StringFirstAggregatorFactory.VALUE_COMPARATOR.compare(o1, o2); | ||
} | ||
|
||
@Override | ||
public Class<? extends SerializablePairLongString> getClazz() | ||
{ | ||
return SerializablePairLongString.class; | ||
} | ||
|
||
@Override | ||
public SerializablePairLongString fromByteBuffer(ByteBuffer buffer, int numBytes) | ||
{ | ||
final ByteBuffer readOnlyBuffer = buffer.asReadOnlyBuffer(); | ||
|
||
long lhs = readOnlyBuffer.getLong(); | ||
int stringSize = readOnlyBuffer.getInt(); | ||
|
||
String lastString = null; | ||
if (stringSize > 0) { | ||
byte[] stringBytes = new byte[stringSize]; | ||
readOnlyBuffer.get(stringBytes, 0, stringSize); | ||
lastString = StringUtils.fromUtf8(stringBytes); | ||
} | ||
|
||
return new SerializablePairLongString(lhs, lastString); | ||
} | ||
|
||
@Override | ||
public byte[] toBytes(SerializablePairLongString val) | ||
{ | ||
String rhsString = val.rhs; | ||
ByteBuffer bbuf; | ||
|
||
if (rhsString != null) { | ||
byte[] rhsBytes = StringUtils.toUtf8(rhsString); | ||
bbuf = ByteBuffer.allocate(Long.BYTES + Integer.BYTES + rhsBytes.length); | ||
bbuf.putLong(val.lhs); | ||
bbuf.putInt(Long.BYTES, rhsBytes.length); | ||
bbuf.position(Long.BYTES + Integer.BYTES); | ||
bbuf.put(rhsBytes); | ||
} else { | ||
bbuf = ByteBuffer.allocate(Long.BYTES + Integer.BYTES); | ||
bbuf.putLong(val.lhs); | ||
bbuf.putInt(Long.BYTES, 0); | ||
} | ||
|
||
return bbuf.array(); | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
public GenericColumnSerializer getSerializer(SegmentWriteOutMedium segmentWriteOutMedium, String column) | ||
{ | ||
return LargeColumnSupportedComplexColumnSerializer.create(segmentWriteOutMedium, column, this.getObjectStrategy()); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package io.druid.query.aggregation.first; | ||
|
||
import io.druid.query.aggregation.ObjectAggregateCombiner; | ||
import io.druid.segment.ColumnValueSelector; | ||
|
||
import javax.annotation.Nullable; | ||
|
||
public class StringFirstAggregateCombiner extends ObjectAggregateCombiner<String> | ||
{ | ||
private String firstString; | ||
|
||
@Override | ||
public void reset(ColumnValueSelector selector) | ||
{ | ||
firstString = (String) selector.getObject(); | ||
} | ||
|
||
@Override | ||
public void fold(ColumnValueSelector selector) | ||
{ | ||
if (firstString == null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks that this is to check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah! It is true, good point! |
||
firstString = (String) selector.getObject(); | ||
} | ||
} | ||
|
||
@Nullable | ||
@Override | ||
public String getObject() | ||
{ | ||
return firstString; | ||
} | ||
|
||
@Override | ||
public Class<String> classOfObject() | ||
{ | ||
return String.class; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please check #5789 (comment).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I add some unit tests:
https://github.com/wizzie-io/druid/blob/9829263e7344b551904ec9eba5aab732d121e311/processing/src/test/java/io/druid/query/aggregation/first/StringFirstAggregationTest.java#L151
https://github.com/wizzie-io/druid/blob/9829263e7344b551904ec9eba5aab732d121e311/processing/src/test/java/io/druid/query/aggregation/last/StringLastAggregationTest.java#L151