Skip to content
Permalink
Browse files
Merge branch 'master' of git@github.com:DataSketches/sketches-pig.git
  • Loading branch information
leerho committed Mar 16, 2017
2 parents 0d4b54f + a713ac0 commit 5456fb2cd5647b0451fc58791bf50c3dd3a783d9
Showing 3 changed files with 17 additions and 4 deletions.
@@ -23,6 +23,7 @@

import com.yahoo.sketches.sampling.ReservoirItemsSketch;
import com.yahoo.sketches.sampling.ReservoirItemsUnion;
import com.yahoo.sketches.sampling.SamplingPigUtil;

/**
* This is a Pig UDF that applies reservoir sampling to input tuples. It implements both
@@ -121,8 +122,6 @@ public Schema outputSchema(final Schema input) {
recordSchema.add(new Schema.FieldSchema(N_ALIAS, DataType.LONG));
recordSchema.add(new Schema.FieldSchema(K_ALIAS, DataType.INTEGER));

//final Schema tupleSchema = new Schema();
//tupleSchema.add(new Schema.FieldSchema(SAMPLES_ALIAS))
// this should add a bag to the output
recordSchema.add(new Schema.FieldSchema(SAMPLES_ALIAS, source, DataType.BAG));

@@ -205,7 +204,7 @@ public Tuple exec(final Tuple inputTuple) throws IOException {
reservoir.update(t);
}
// newDefaultBag(List<Tuple>) does *not* copy values
final List<Tuple> data = reservoir.getRawSamplesAsList();
final List<Tuple> data = SamplingPigUtil.getRawSamplesAsList(reservoir);
outputBag = BagFactory.getInstance().newDefaultBag(data);
k = reservoir.getK();
}
@@ -15,6 +15,7 @@

import com.yahoo.sketches.sampling.ReservoirItemsSketch;
import com.yahoo.sketches.sampling.ReservoirItemsUnion;
import com.yahoo.sketches.sampling.SamplingPigUtil;

/**
* This is a Pig UDF that unions reservoir samples. It implements
@@ -82,8 +83,9 @@ public Tuple getValue() {
return null;
}

// newDefaultBag(List<Tuple>) does *not* copy values
final ReservoirItemsSketch<Tuple> resultSketch = union_.getResult();
final List<Tuple> data = resultSketch.getRawSamplesAsList();
final List<Tuple> data = SamplingPigUtil.getRawSamplesAsList(resultSketch);
final DataBag sampleBag = BagFactory.getInstance().newDefaultBag(data);

return ReservoirSampling.createResultTuple(resultSketch.getN(), resultSketch.getK(), sampleBag);
@@ -0,0 +1,12 @@
package com.yahoo.sketches.sampling;

import java.util.ArrayList;

/**
* @author Jon Malkin
*/
public final class SamplingPigUtil {
public static <T> ArrayList<T> getRawSamplesAsList(final ReservoirItemsSketch<T> sketch) {
return sketch.getRawSamplesAsList();
}
}

0 comments on commit 5456fb2

Please sign in to comment.