Permalink
Show file tree
Hide file tree
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
3 changed files
with
131 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
package com.yahoo.sketches.pig.sampling; | ||
|
||
import static com.yahoo.sketches.pig.sampling.VarOptCommonImpl.RECORD_ALIAS; | ||
import static com.yahoo.sketches.pig.sampling.VarOptCommonImpl.WEIGHT_ALIAS; | ||
import static com.yahoo.sketches.pig.sampling.VarOptSamplingTest.EPS; | ||
import static org.testng.Assert.assertEquals; | ||
import static org.testng.Assert.assertNull; | ||
import static org.testng.Assert.assertTrue; | ||
import static org.testng.Assert.fail; | ||
|
||
import java.io.IOException; | ||
import java.util.List; | ||
|
||
import org.apache.pig.data.DataBag; | ||
import org.apache.pig.data.DataByteArray; | ||
import org.apache.pig.data.DataType; | ||
import org.apache.pig.data.Tuple; | ||
import org.apache.pig.data.TupleFactory; | ||
import org.apache.pig.impl.logicalLayer.schema.Schema; | ||
import org.testng.annotations.Test; | ||
|
||
import com.yahoo.sketches.sampling.VarOptItemsSketch; | ||
|
||
public class GetVarOptSamplesTest { | ||
private static final ArrayOfTuplesSerDe serDe_ = new ArrayOfTuplesSerDe(); | ||
|
||
@Test | ||
public void checkDegenerateInput() { | ||
final GetVarOptSamples udf = new GetVarOptSamples(); | ||
|
||
try { | ||
assertNull(udf.exec(null)); | ||
assertNull(udf.exec(TupleFactory.getInstance().newTuple(0))); | ||
assertNull(udf.exec(TupleFactory.getInstance().newTuple((Object) null))); | ||
} catch (final IOException e) { | ||
fail("Unexpected IOException"); | ||
} | ||
} | ||
|
||
@Test | ||
public void checkExec() { | ||
final int k = 10; | ||
final int n = 25; // exact mode | ||
final GetVarOptSamples udf = new GetVarOptSamples(); | ||
|
||
try { | ||
final VarOptItemsSketch<Tuple> vis = VarOptItemsSketch.newInstance(k); | ||
double cumWt = 0.0; | ||
for (int i = 1; i <= n; ++i) { | ||
final Tuple t = TupleFactory.getInstance().newTuple(2); | ||
final double wt = 1.0 * i; | ||
t.set(0, wt); | ||
t.set(1, i); | ||
vis.update(t, wt); | ||
cumWt += wt; | ||
} | ||
|
||
final DataByteArray dba = new DataByteArray(vis.toByteArray(serDe_)); | ||
final Tuple inputTuple = TupleFactory.getInstance().newTuple(dba); | ||
final DataBag result = udf.exec(inputTuple); | ||
|
||
double cumResultWt = 0.0; | ||
for (Tuple sample : result) { | ||
cumResultWt += (double) sample.get(0); | ||
final Tuple record = (Tuple) sample.get(1); | ||
final int id = (int) record.get(1); | ||
assertTrue(id >= 1 && id <= n); | ||
} | ||
assertEquals(cumResultWt, cumWt, EPS); | ||
} catch (final IOException e) { | ||
fail("Unexpected IOException"); | ||
} | ||
} | ||
|
||
@Test | ||
public void validOutputSchemaTest() throws IOException { | ||
final GetVarOptSamples udf = new GetVarOptSamples(); | ||
|
||
final Schema serializedSketch = new Schema(); | ||
serializedSketch.add(new Schema.FieldSchema("field1", DataType.BYTEARRAY)); | ||
//final Schema inputSchema = new Schema(); | ||
//inputSchema.add(new Schema.FieldSchema("record", serializedSketch, DataType.TUPLE)); | ||
|
||
//final Schema output = udf.outputSchema(inputSchema); | ||
final Schema output = udf.outputSchema(serializedSketch); | ||
assertEquals(output.size(), 1); | ||
assertEquals(output.getField(0).type, DataType.BAG); | ||
|
||
final List<Schema.FieldSchema> outputFields = output.getField(0).schema.getFields(); | ||
assertEquals(outputFields.size(), 2); | ||
|
||
// check high-level structure | ||
assertEquals(outputFields.get(0).alias, WEIGHT_ALIAS); | ||
assertEquals(outputFields.get(0).type, DataType.DOUBLE); | ||
assertEquals(outputFields.get(1).alias, RECORD_ALIAS); | ||
assertEquals(outputFields.get(1).type, DataType.TUPLE); | ||
} | ||
|
||
@Test | ||
public void badOutputSchemaTest() throws IOException { | ||
final GetVarOptSamples udf = new GetVarOptSamples(); | ||
|
||
try { | ||
udf.outputSchema(null); | ||
} catch (final IllegalArgumentException e) { | ||
// expected | ||
} | ||
|
||
try { | ||
udf.outputSchema(new Schema()); | ||
} catch (final IllegalArgumentException e) { | ||
// expected | ||
} | ||
|
||
try { | ||
final Schema wrongSchema = new Schema(); | ||
wrongSchema.add(new Schema.FieldSchema("field", DataType.BOOLEAN)); | ||
udf.outputSchema(wrongSchema); | ||
} catch (final IllegalArgumentException e) { | ||
// expected | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters