Skip to content
Permalink
Browse files
Merge pull request #36 from DataSketches/reservoir_helper
add helper class to access package private method from sketches-core
  • Loading branch information
jmalkin committed Feb 15, 2017
2 parents 3b8f643 + c4493cc commit a713ac009808a2d48ef6d8f66f934bb905bc9d99
Showing 5 changed files with 21 additions and 8 deletions.
@@ -63,7 +63,7 @@ public DataToDoublesSketch(final int k) {
super();
unionBuilder_ = DoublesUnion.builder();
if (k > 0) {
unionBuilder_.setK(k);
unionBuilder_.setMaxK(k);
}
}

@@ -297,7 +297,7 @@ public IntermediateFinal(final String kStr) {
public IntermediateFinal(final int k) {
unionBuilder_ = DoublesUnion.builder();
if (k > 0) {
unionBuilder_.setK(k);
unionBuilder_.setMaxK(k);
}
}

@@ -64,7 +64,7 @@ public UnionDoublesSketch(final int k) {
super();
unionBuilder_ = DoublesUnion.builder();
if (k > 0) {
unionBuilder_.setK(k);
unionBuilder_.setMaxK(k);
}
}

@@ -304,7 +304,7 @@ public IntermediateFinal(final String kStr) {
*/
public IntermediateFinal(final int k) {
unionBuilder_ = DoublesUnion.builder();
if (k > 0) { unionBuilder_.setK(k); }
if (k > 0) { unionBuilder_.setMaxK(k); }
}

@Override // IntermediateFinal exec
@@ -23,6 +23,7 @@

import com.yahoo.sketches.sampling.ReservoirItemsSketch;
import com.yahoo.sketches.sampling.ReservoirItemsUnion;
import com.yahoo.sketches.sampling.SamplingPigUtil;

/**
* This is a Pig UDF that applies reservoir sampling to input tuples. It implements both
@@ -121,8 +122,6 @@ public Schema outputSchema(final Schema input) {
recordSchema.add(new Schema.FieldSchema(N_ALIAS, DataType.LONG));
recordSchema.add(new Schema.FieldSchema(K_ALIAS, DataType.INTEGER));

//final Schema tupleSchema = new Schema();
//tupleSchema.add(new Schema.FieldSchema(SAMPLES_ALIAS))
// this should add a bag to the output
recordSchema.add(new Schema.FieldSchema(SAMPLES_ALIAS, source, DataType.BAG));

@@ -205,7 +204,7 @@ public Tuple exec(final Tuple inputTuple) throws IOException {
reservoir.update(t);
}
// newDefaultBag(List<Tuple>) does *not* copy values
final List<Tuple> data = reservoir.getRawSamplesAsList();
final List<Tuple> data = SamplingPigUtil.getRawSamplesAsList(reservoir);
outputBag = BagFactory.getInstance().newDefaultBag(data);
k = reservoir.getK();
}
@@ -15,6 +15,7 @@

import com.yahoo.sketches.sampling.ReservoirItemsSketch;
import com.yahoo.sketches.sampling.ReservoirItemsUnion;
import com.yahoo.sketches.sampling.SamplingPigUtil;

/**
* This is a Pig UDF that unions reservoir samples. It implements
@@ -82,8 +83,9 @@ public Tuple getValue() {
return null;
}

// newDefaultBag(List<Tuple>) does *not* copy values
final ReservoirItemsSketch<Tuple> resultSketch = union_.getResult();
final List<Tuple> data = resultSketch.getRawSamplesAsList();
final List<Tuple> data = SamplingPigUtil.getRawSamplesAsList(resultSketch);
final DataBag sampleBag = BagFactory.getInstance().newDefaultBag(data);

return ReservoirSampling.createResultTuple(resultSketch.getN(), resultSketch.getK(), sampleBag);
@@ -0,0 +1,12 @@
package com.yahoo.sketches.sampling;

import java.util.ArrayList;

/**
* @author Jon Malkin
*/
public final class SamplingPigUtil {
public static <T> ArrayList<T> getRawSamplesAsList(final ReservoirItemsSketch<T> sketch) {
return sketch.getRawSamplesAsList();
}
}

0 comments on commit a713ac0

Please sign in to comment.