Skip to content
Permalink
Browse files
Merge pull request #64 from apache/core_3_0_0
use the latest datasketches-java-3.0.0
  • Loading branch information
AlexanderSaydakov committed Jan 13, 2022
2 parents d4cb0c4 + 3576097 commit 117bacb52da8b2a69071478a32090c860cf0d72a
Showing 31 changed files with 92 additions and 121 deletions.
2 NOTICE
@@ -1,5 +1,5 @@
Apache DataSketches Pig
Copyright 2020 - The Apache Software Foundation
Copyright 2022 - The Apache Software Foundation

Copyright 2015-2018 Yahoo
Copyright 2019 Verizon Media
34 pom.xml
@@ -51,11 +51,11 @@ under the License.
<archive>https://mail-archives.apache.org/mod_mbox/datasketches-dev</archive>
</mailingList>
<mailingList>
<name>sketches-user</name>
<archive>https://groups.google.com/forum/#!forum/sketches-user</archive>
<subscribe>mailto:sketches-user%2Bsubscribe@googlegroups.com</subscribe>
<unsubscribe>mailto:sketches-user%2Bunsubscribe@googlegroups.com</unsubscribe>
<post>mailto:sketches-user@googlegroups.com</post>
<name>DataSketches Users</name>
<subscribe>user-subscribe@datasketches.apache.org</subscribe>
<unsubscribe>user-unsubscribe@datasketches.apache.org</unsubscribe>
<post>user@datasketches.apache.org</post>
<archive>https://mail-archives.apache.org/mod_mbox/datasketches-user</archive>
</mailingList>
</mailingLists>

@@ -85,17 +85,17 @@ under the License.

<properties>
<!-- UNIQUE FOR THIS JAVA COMPONENT -->
<datasketches-java.version>1.3.0-incubating</datasketches-java.version>
<datasketches-java.version>3.0.0</datasketches-java.version>
<pig.version>0.17.0</pig.version>
<hadoop-common.version>2.8.5</hadoop-common.version>
<commons-math3.version>3.6.1</commons-math3.version>
<!-- END:UNIQUE FOR THIS JAVA COMPONENT -->

<!-- Test -->
<testng.version>7.1.0</testng.version>
<testng.version>7.4.0</testng.version>

<!-- System-wide properties -->
<maven.version>3.0.0</maven.version>
<maven.version>3.5.0</maven.version>
<java.version>1.8</java.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
@@ -112,22 +112,22 @@ under the License.
<maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version> <!-- overrides parent -->
<maven-deploy-plugin.version>3.0.0-M1</maven-deploy-plugin.version> <!-- overrides parent -->
<maven-enforcer-plugin.version>3.0.0-M2</maven-enforcer-plugin.version> <!-- overrides parent -->
<maven-gpg-plugin.version>1.6</maven-gpg-plugin.version> <!-- overrides parent -->
<maven-gpg-plugin.version>3.0.1</maven-gpg-plugin.version> <!-- overrides parent -->
<maven-jar-plugin.version>3.2.0</maven-jar-plugin.version> <!-- overrides parent -->
<maven-javadoc-plugin.version>3.2.0</maven-javadoc-plugin.version> <!-- overrides parent -->
<maven-release-plugin.version>3.0.0-M1</maven-release-plugin.version> <!-- overrides parent -->
<maven-javadoc-plugin.version>3.3.1</maven-javadoc-plugin.version> <!-- overrides parent -->
<maven-release-plugin.version>3.0.0-M4</maven-release-plugin.version> <!-- overrides parent -->
<maven-remote-resources-plugin.version>[1.7.0,)</maven-remote-resources-plugin.version> <!-- overrides parent -->
<maven-source-plugin.version>3.2.1</maven-source-plugin.version> <!-- overrides parent -->
<maven-surefire-plugin.version>3.0.0-M5</maven-surefire-plugin.version> <!-- overrides parent -->
<!-- Apache Plugins -->
<apache-rat-plugin.version>0.13</apache-rat-plugin.version> <!-- overrides parent -->
<!-- org.jacoco Maven Plugins -->
<jacoco-maven-plugin.version>0.8.6</jacoco-maven-plugin.version>
<jacoco-maven-plugin.version>0.8.7</jacoco-maven-plugin.version>
<!-- org.eluder Maven Plugins -->
<coveralls-maven-plugin.version>4.3.0</coveralls-maven-plugin.version>
<!-- other -->
<lifecycle-mapping.version>1.0.0</lifecycle-mapping.version>
<git-commit-id-plugin.version>3.0.0</git-commit-id-plugin.version>
<git-commit-id-maven-plugin.version>5.0.0</git-commit-id-maven-plugin.version>
</properties>

<repositories>
@@ -502,9 +502,9 @@ under the License.
<pluginManagement>
<plugins>
<plugin>
<groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId>
<version>${git-commit-id-plugin.version}</version>
<groupId>io.github.git-commit-id</groupId>
<artifactId>git-commit-id-maven-plugin</artifactId>
<version>${git-commit-id-maven-plugin.version}</version>
<executions>
<execution>
<goals>
@@ -612,7 +612,7 @@ under the License.
<plugins>
<plugin>
<groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId>
<artifactId>git-commit-id-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
@@ -134,8 +134,7 @@ public Tuple exec(final Tuple inputTuple) throws IOException {
}

final AnotB aNOTb = SetOperation.builder().setSeed(seed_).buildANotB();
aNOTb.update(sketchA, sketchB);
final CompactSketch compactSketch = aNOTb.getResult(true, null);
final CompactSketch compactSketch = aNOTb.aNotB(sketchA, sketchB);
return compactOrderedSketchToTuple(compactSketch);
}

@@ -544,7 +544,7 @@ else if (f0 instanceof DataByteArray) { //inputTuple.bag0.dataTupleN.f0:DBA
// due to system bagged outputs from multiple mapper Intermediate functions.
// Each dataTuple.DBA:sketch will merged into the union.
final DataByteArray dba = ((DataByteArray) f0);
union.update(Memory.wrap(dba.get()));
union.union(Memory.wrap(dba.get()));

}
else { // we should never get here.
@@ -254,7 +254,7 @@ private static void updateIntersection(final DataBag bag, final Intersection int
final DataByteArray dba = (DataByteArray) f0;
final Memory srcMem = Memory.wrap(dba.get());
final Sketch sketch = Sketch.wrap(srcMem, seed);
intersection.update(sketch);
intersection.intersect(sketch);
}
else {
throw new IllegalArgumentException(
@@ -380,7 +380,7 @@ else if (f0 instanceof DataByteArray) { //inputTuple.bag0.dataTupleN.f0:DBA
final DataByteArray dba = (DataByteArray) f0;
final Memory srcMem = Memory.wrap(dba.get());
final Sketch sketch = Sketch.wrap(srcMem, mySeed_);
intersection.update(sketch);
intersection.intersect(sketch);
}
else { // we should never get here.
throw new IllegalArgumentException("dataTuple.Field0: Is not a DataByteArray: "
@@ -308,7 +308,7 @@ private static void updateUnion(final DataBag bag, final org.apache.datasketches
if (type == DataType.BYTEARRAY) {
final DataByteArray dba = (DataByteArray) f0;
if (dba.size() > 0) {
union.update(Memory.wrap(dba.get()));
union.union(Memory.wrap(dba.get()));
}
} else {
throw new IllegalArgumentException("Field type was not DataType.BYTEARRAY: " + type);
@@ -503,7 +503,7 @@ else if (f0 instanceof DataByteArray) { //inputTuple.bag0.dataTupleN.f0:DBA
// Each dataTuple.DBA:sketch will merged into the union.
final DataByteArray dba = (DataByteArray) f0;
final Memory srcMem = Memory.wrap(dba.get());
union.update(srcMem);
union.union(srcMem);

}
else { // we should never get here.
@@ -20,8 +20,8 @@
package org.apache.datasketches.pig.tuple;

import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.ArrayOfDoublesSketchIterator;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator;

class ArrayOfDoublesSketchStats {

@@ -23,8 +23,8 @@
import java.util.Arrays;

import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
@@ -22,9 +22,9 @@
import java.io.IOException;

import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.ArrayOfDoublesSketchIterator;
import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
@@ -23,8 +23,8 @@

import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
@@ -22,8 +22,8 @@
import java.io.IOException;

import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
@@ -25,9 +25,9 @@
import org.apache.datasketches.quantiles.DoublesSketch;
import org.apache.datasketches.quantiles.DoublesSketchBuilder;
import org.apache.datasketches.quantiles.UpdateDoublesSketch;
import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.ArrayOfDoublesSketchIterator;
import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
@@ -23,8 +23,8 @@

import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
@@ -24,8 +24,8 @@
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.commons.math3.stat.inference.TTest;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
@@ -24,11 +24,11 @@
import java.io.IOException;

import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.tuple.ArrayOfDoublesSetOperationBuilder;
import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
import org.apache.datasketches.tuple.ArrayOfDoublesUnion;
import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSetOperationBuilder;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUnion;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.log4j.Logger;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
@@ -92,13 +92,13 @@ public Tuple exec(final Tuple inputTuple) throws IOException {
new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(sketchSize_)
.setSamplingProbability(samplingProbability_).setNumberOfValues(numValues_).build();
DataToArrayOfDoublesSketchBase.updateSketch((DataBag) item, sketch, numValues_);
union.update(sketch);
union.union(sketch);
} else if (item instanceof DataByteArray) {
// This is a sketch from a prior call to the
// Intermediate function. merge it with the
// current sketch.
final DataByteArray dba = (DataByteArray) item;
union.update(ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get())));
union.union(ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get())));
} else {
// we should never get here.
throw new IllegalArgumentException("InputTuple.Field0: Bag contains unrecognized types: "
@@ -23,8 +23,8 @@

import java.io.IOException;

import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.log4j.Logger;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -124,13 +124,13 @@ public Tuple exec(final Tuple inputTuple) throws IOException {
// just insert each item of the tuple into the sketch
final UpdatableSketch<U, S> sketch = sketchBuilder_.build();
DataToSketch.updateSketch((DataBag) item, sketch);
union.update(sketch);
union.union(sketch);
} else if (item instanceof DataByteArray) {
// This is a sketch from a prior call to the
// Intermediate function. merge it with the
// current sketch.
final Sketch<S> incomingSketch = Util.deserializeSketchFromTuple(dataTuple, summaryDeserializer_);
union.update(incomingSketch);
union.union(incomingSketch);
} else {
// we should never get here.
throw new IllegalArgumentException(
@@ -24,9 +24,9 @@
import java.io.IOException;

import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.tuple.ArrayOfDoublesSetOperationBuilder;
import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
import org.apache.datasketches.tuple.ArrayOfDoublesUnion;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSetOperationBuilder;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUnion;
import org.apache.log4j.Logger;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
@@ -81,13 +81,13 @@ public Tuple exec(final Tuple inputTuple) throws IOException {
// this is from a prior call to the initial function, so there is a nested bag.
for (final Tuple innerTuple: (DataBag) item) {
final DataByteArray dba = (DataByteArray) innerTuple.get(0);
union.update(ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get())));
union.union(ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get())));
}
} else if (item instanceof DataByteArray) {
// This is a sketch from a call to the Intermediate function
// Add it to the current union
final DataByteArray dba = (DataByteArray) item;
union.update(ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get())));
union.union(ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get())));
} else {
// we should never get here.
throw new IllegalArgumentException("InputTuple.Field0: Bag contains unrecognized types: "
@@ -24,10 +24,10 @@
import java.io.IOException;

import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.tuple.ArrayOfDoublesSetOperationBuilder;
import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
import org.apache.datasketches.tuple.ArrayOfDoublesUnion;
import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSetOperationBuilder;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUnion;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.log4j.Logger;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -116,7 +116,7 @@ private static void updateUnion(final DataBag bag, final ArrayOfDoublesUnion uni
continue;
}
final DataByteArray dba = (DataByteArray) innerTuple.get(0);
union.update(ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get())));
union.union(ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get())));
}
}

@@ -128,7 +128,7 @@ private static <S extends Summary> void updateUnion(final DataBag bag, final Uni
continue;
}
final Sketch<S> incomingSketch = Util.deserializeSketchFromTuple(innerTuple, summaryDeserializer);
union.update(incomingSketch);
union.union(incomingSketch);
}
}

@@ -94,13 +94,13 @@ public Tuple exec(final Tuple inputTuple) throws IOException {
// this is from a prior call to the initial function, so there is a nested bag.
for (Tuple innerTuple: (DataBag) item) {
final Sketch<S> incomingSketch = Util.deserializeSketchFromTuple(innerTuple, summaryDeserializer_);
union.update(incomingSketch);
union.union(incomingSketch);
}
} else if (item instanceof DataByteArray) {
// This is a sketch from a call to the Intermediate function
// Add it to the current union.
final Sketch<S> incomingSketch = Util.deserializeSketchFromTuple(dataTuple, summaryDeserializer_);
union.update(incomingSketch);
union.union(incomingSketch);
} else {
// we should never get here.
throw new IllegalArgumentException(

0 comments on commit 117bacb

Please sign in to comment.