Skip to content

Commit

Permalink
Merge pull request #56 from apache/Update_to_core2.0.0
Browse files Browse the repository at this point in the history
Updates required to match java core 2.0.0.
  • Loading branch information
leerho committed Mar 3, 2021
2 parents 880d5cf + 29292bd commit a9e73d4
Show file tree
Hide file tree
Showing 79 changed files with 579 additions and 540 deletions.
55 changes: 42 additions & 13 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ under the License.

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">

<modelVersion>4.0.0</modelVersion>

<parent>
Expand All @@ -30,17 +31,14 @@ under the License.
</parent>

<groupId>org.apache.datasketches</groupId>

<!-- UNIQUE FOR THIS JAVA COMPONENT -->
<artifactId>datasketches-hive</artifactId>
<version>1.2.0-SNAPSHOT</version>
<packaging>jar</packaging>

<name>${project.artifactId}</name>
<description>Apache Hive adaptors for the DataSketches library.</description>
<!-- END: UNIQUE FOR THIS JAVA COMPONENT -->

<url>https://datasketches.apache.org/</url>
<name>${project.artifactId}</name>
<inceptionYear>2015</inceptionYear>
<packaging>jar</packaging> <!-- jar is the default -->

<mailingLists>
<mailingList>
Expand Down Expand Up @@ -85,7 +83,7 @@ under the License.

<properties>
<!-- UNIQUE FOR THIS JAVA COMPONENT -->
<datasketches-java.version>1.3.0-incubating</datasketches-java.version>
<datasketches-java.version>2.0.0</datasketches-java.version>
<hive-exec.version>2.3.4</hive-exec.version>
<hadoop-common.version>2.8.5</hadoop-common.version>
<slf4j-simple.version>1.7.30</slf4j-simple.version>
Expand All @@ -95,7 +93,7 @@ under the License.
<testng.version>7.1.0</testng.version>

<!-- System-wide properties -->
<maven.version>3.0.0</maven.version>
<maven.version>3.5.0</maven.version>
<java.version>1.8</java.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
Expand All @@ -105,8 +103,11 @@ under the License.
<project.build.resourceEncoding>${charset.encoding}</project.build.resourceEncoding>
<project.reporting.outputEncoding>${charset.encoding}</project.reporting.outputEncoding>

<!-- org.codehaus.plexus used for strict profile testing-->
<plexus-compiler-javac-errorprone.version>2.8.8</plexus-compiler-javac-errorprone.version>
<!-- org.codehaus plugins -->
<!-- used for strict profile testing-->
<plexus-compiler-javac-errorprone.version>2.8.5</plexus-compiler-javac-errorprone.version>
<versions-maven-plugin.version>2.8.1</versions-maven-plugin.version>

<!-- Maven Plugins -->
<maven-assembly-plugin.version>3.3.0</maven-assembly-plugin.version> <!-- overrides parent -->
<maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version> <!-- overrides parent -->
Expand All @@ -124,6 +125,7 @@ under the License.
<!-- org.jacoco Maven Plugins -->
<jacoco-maven-plugin.version>0.8.6</jacoco-maven-plugin.version>
<!-- org.eluder Maven Plugins -->
<coveralls-repo-token></coveralls-repo-token>
<coveralls-maven-plugin.version>4.3.0</coveralls-maven-plugin.version>
<!-- other -->
<lifecycle-mapping.version>1.0.0</lifecycle-mapping.version>
Expand Down Expand Up @@ -209,6 +211,13 @@ under the License.
<build>
<pluginManagement>
<plugins>

<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>versions-maven-plugin</artifactId>
<version>${versions-maven-plugin.version}</version>
</plugin>

<plugin>
<!-- We want to deploy the artifacts to a staging location for perusal -->
<!-- Apache Parent pom: apache-release profile -->
Expand All @@ -220,12 +229,14 @@ under the License.
<!-- see maven-install-plugin -->
</configuration>
</plugin>

<plugin>
<!-- Apache Parent pom, pluginManagement-->
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId>
<version>${maven-release-plugin.version}</version>
</plugin>

<plugin>
<!-- Extends Apache Parent pom, pluginManagement-->
<groupId>org.apache.maven.plugins</groupId>
Expand All @@ -234,18 +245,21 @@ under the License.
<executions>
<execution>
<id>default-jar</id>
<phase>package</phase>
<goals>
<goal>jar</goal>
</goals>
</execution>
<execution>
<id>default-test-jar</id>
<phase>package</phase>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>

<plugin>
<!-- Extends Apache Parent pom, apache-release profile -->
<groupId>org.apache.maven.plugins</groupId>
Expand All @@ -263,6 +277,7 @@ under the License.
</execution>
</executions>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
Expand Down Expand Up @@ -293,6 +308,7 @@ under the License.
</execution>
</executions>
</plugin>

<plugin>
<!-- Apache Parent pom, pluginManagement-->
<groupId>org.apache.rat</groupId>
Expand All @@ -312,13 +328,16 @@ under the License.
<useDefaultExcludes>true</useDefaultExcludes>
<excludes>
<!-- rat uses .gitignore for excludes by default -->
<exclude>**/*.yaml</exclude>
<exclude>**/*.yml</exclude>
<exclude>**/.*</exclude>
<exclude>**/test/resources/**/*.txt</exclude>
<exclude>.asf.yaml</exclude>
<exclude>LICENSE</exclude>
<exclude>NOTICE</exclude>
</excludes>
</configuration>
</plugin>

<plugin>
<!-- Extends Apache Parent pom, apache-release profile -->
<groupId>org.apache.maven.plugins</groupId>
Expand All @@ -341,6 +360,7 @@ under the License.
</execution>
</executions>
</plugin>

<plugin>
<!-- Apache Parent pom, pluginManagement-->
<groupId>org.apache.maven.plugins</groupId>
Expand All @@ -352,29 +372,38 @@ under the License.
<redirectTestOutputToFile>true</redirectTestOutputToFile>
</configuration>
</plugin>

<plugin>
<!-- Generates code coverage report from website. -->
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>${jacoco-maven-plugin.version}</version>
<executions>
<execution>
<id>prepare-agent</id>
<id>default-prepare-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>default-report</id>
<goals>
<goal>report</goal>
</goals>
</execution>
</executions>
</plugin>

<plugin>
<!-- Submit code coverage report to Coveralls.io. -->
<groupId>org.eluder.coveralls</groupId>
<artifactId>coveralls-maven-plugin</artifactId>
<version>${coveralls-maven-plugin.version}</version>
<configuration>
<!-- Since we use Travis CI we do not have to put a Coveralls token here. -->
<repoToken>${coveralls-repo-token}</repoToken>
</configuration>
</plugin>

</plugins>
</pluginManagement>
<plugins>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,12 @@
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;

/**
* Hive UDAF to create an HllSketch from raw data.
* Hive UDAF to create an CPCSketch from raw data.
*
* <p><b>Note</b> Strings as raw data values are encoded as a UTF-16 VARCHAR
* prior to being submitted to the sketch. If the user requires a different
* encoding for cross-platform compatibility, it is recommended that these values be encoded prior
* to being submitted and then typed as a BINARY byte[].</p>
*/
@Description(
name = "dataToSketch",
Expand All @@ -51,7 +56,7 @@
+ "> SELECT dataToSketch(val, 12) FROM src;\n"
+ "The return value is a binary blob that can be operated on by other sketch related functions."
+ " The lgK parameter controls the sketch size and rlative error expected from the sketch."
+ " It is optional an must be from 4 to 26. The default is 11, which is expected to yield errors"
+ " It is optional and must be from 4 to 26. The default is 11, which is expected to yield errors"
+ " of roughly +-1.5% in the estimation of uniques with 95% confidence."
+ " The seed parameter is optional")
public class DataToSketchUDAF extends AbstractGenericUDAFResolver {
Expand Down Expand Up @@ -119,7 +124,7 @@ public static class DataToSketchEvaluator extends SketchEvaluator {
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
// Different State is used for the iterate phase and the merge phase.
// SketchState is more space-efficient, so let's use SketchState if possible.
if ((mode_ == Mode.PARTIAL1) || (mode_ == Mode.COMPLETE)) { // iterate() will be used
if (this.mode_ == Mode.PARTIAL1 || this.mode_ == Mode.COMPLETE) { // iterate() will be used
return new SketchState();
}
return new UnionState();
Expand All @@ -136,22 +141,22 @@ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
super.init(mode, parameters);
mode_ = mode;
if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
this.mode_ = mode;
if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
// input is original data
inputInspector_ = (PrimitiveObjectInspector) parameters[0];
this.inputInspector_ = (PrimitiveObjectInspector) parameters[0];
if (parameters.length > 1) {
lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
this.lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
}
if (parameters.length > 2) {
seedInspector_ = (PrimitiveObjectInspector) parameters[2];
this.seedInspector_ = (PrimitiveObjectInspector) parameters[2];
}
} else {
// input for PARTIAL2 and FINAL is the output from PARTIAL1
intermediateInspector_ = (StructObjectInspector) parameters[0];
this.intermediateInspector_ = (StructObjectInspector) parameters[0];
}

if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
// intermediate results need to include the lgK and the target HLL type
return ObjectInspectorFactory.getStandardStructObjectInspector(
Arrays.asList(LG_K_FIELD, SEED_FIELD, SKETCH_FIELD),
Expand All @@ -176,24 +181,25 @@ public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters)
* java.lang.Object[])
*/
@Override
public void iterate(final @SuppressWarnings("deprecation") AggregationBuffer agg,
@SuppressWarnings("deprecation")
public void iterate(final AggregationBuffer agg,
final Object[] parameters) throws HiveException {
if (parameters[0] == null) { return; }
final SketchState state = (SketchState) agg;
if (!state.isInitialized()) {
initializeState(state, parameters);
}
state.update(parameters[0], inputInspector_);
state.update(parameters[0], this.inputInspector_);
}

private void initializeState(final State state, final Object[] parameters) {
int lgK = DEFAULT_LG_K;
if (lgKInspector_ != null) {
lgK = PrimitiveObjectInspectorUtils.getInt(parameters[1], lgKInspector_);
if (this.lgKInspector_ != null) {
lgK = PrimitiveObjectInspectorUtils.getInt(parameters[1], this.lgKInspector_);
}
long seed = DEFAULT_UPDATE_SEED;
if (seedInspector_ != null) {
seed = PrimitiveObjectInspectorUtils.getLong(parameters[2], seedInspector_);
if (this.seedInspector_ != null) {
seed = PrimitiveObjectInspectorUtils.getLong(parameters[2], this.seedInspector_);
}
state.init(lgK, seed);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,23 +61,24 @@ public Object terminatePartial(final @SuppressWarnings("deprecation") Aggregatio
}

@Override
public void merge(final @SuppressWarnings("deprecation") AggregationBuffer buf, final Object data)
@SuppressWarnings("deprecation")
public void merge(final AggregationBuffer buf, final Object data)
throws HiveException {
if (data == null) { return; }
final UnionState state = (UnionState) buf;
if (!state.isInitialized()) {
initializeState(state, data);
}
final BytesWritable serializedSketch = (BytesWritable) intermediateInspector_.getStructFieldData(
data, intermediateInspector_.getStructFieldRef(SKETCH_FIELD));
final BytesWritable serializedSketch = (BytesWritable) this.intermediateInspector_.getStructFieldData(
data, this.intermediateInspector_.getStructFieldRef(SKETCH_FIELD));
state.update(CpcSketch.heapify(BytesWritableHelper.wrapAsMemory(serializedSketch), state.getSeed()));
}

private void initializeState(final UnionState state, final Object data) {
final int lgK = ((IntWritable) intermediateInspector_.getStructFieldData(
data, intermediateInspector_.getStructFieldRef(LG_K_FIELD))).get();
final long seed = ((LongWritable) intermediateInspector_.getStructFieldData(
data, intermediateInspector_.getStructFieldRef(SEED_FIELD))).get();
final int lgK = ((IntWritable) this.intermediateInspector_.getStructFieldData(
data, this.intermediateInspector_.getStructFieldRef(LG_K_FIELD))).get();
final long seed = ((LongWritable) this.intermediateInspector_.getStructFieldData(
data, this.intermediateInspector_.getStructFieldRef(SEED_FIELD))).get();
state.init(lgK, seed);
}

Expand Down

0 comments on commit a9e73d4

Please sign in to comment.