Skip to content

Commit

Permalink
Beef up Pig integration testing
Browse files Browse the repository at this point in the history
relates to #200
relates to #117
relates to #199
  • Loading branch information
costin committed Apr 29, 2014
1 parent 404ecb6 commit bbb8d73
Show file tree
Hide file tree
Showing 11 changed files with 278 additions and 102 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -14,3 +14,4 @@ metastore_db
/hdfs/data
/repository-hdfs/data/*
/mr/src/main/resources/esh-build.properties
/pig/tmp-pig/*
3 changes: 2 additions & 1 deletion mr/src/test/resources/log4j.properties
Expand Up @@ -27,5 +27,6 @@ log4j.category.cascading=INFO
# Integration testing
log4j.category.org.elasticsearch.hadoop.integration.hive=INFO

# Hadoop
# Pig
#log4j.category.org.apache.pig=WARN

Expand Up @@ -21,6 +21,11 @@
import org.elasticsearch.hadoop.Provisioner;
import org.junit.Test;

import static org.junit.Assert.*;

import static org.hamcrest.CoreMatchers.*;


public class AbstractPigExtraTests extends AbstractPigTests {

@Test
Expand All @@ -36,8 +41,22 @@ public void testJoin() throws Exception {
"ES_PARENT = LOAD 'pig-test/parent' using org.elasticsearch.hadoop.pig.EsStorage() as (parent_name: chararray, parent_value: chararray);" +
"ES_CHILD = LOAD 'pig-test/child' using org.elasticsearch.hadoop.pig.EsStorage() as (child_name: chararray, parent_name: chararray, child_value: long);" +
"CO_GROUP = COGROUP ES_PARENT by parent_name, ES_CHILD by parent_name;" +
"DUMP CO_GROUP;";
"PARENT_CHILD = JOIN ES_PARENT by parent_name, ES_CHILD by parent_name;" +
"STORE PARENT_CHILD INTO 'tmp-pig/testjoin-join';" +
"STORE CO_GROUP INTO 'tmp-pig/testjoin-cogroup';";
pig.executeScript(script);
pig.executeScript(script2);

String join = getResults("tmp-pig/testjoin-join");
assertThat(join, containsString(tabify("parent1", "name1", "child1", "parent1", "100")));
assertThat(join, containsString(tabify("parent1", "name1", "child2", "parent1", "200")));
assertThat(join, containsString(tabify("parent2", "name2", "child3", "parent2", "300")));

String cogroup = getResults("tmp-pig/testjoin-cogroup");
assertThat(cogroup, containsString(tabify("parent1", "{(parent1,name1)}")));
// bags are not ordered so check each tuple individually
assertThat(cogroup, containsString("(child2,parent1,200)"));
assertThat(cogroup, containsString("(child1,parent1,100)"));
assertThat(cogroup, containsString(tabify("parent2", "{(parent2,name2)}", "{(child3,parent2,300)}")));
}
}
Expand Up @@ -31,9 +31,16 @@
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;

import static org.junit.Assert.*;

import static org.hamcrest.Matchers.*;

@RunWith(Parameterized.class)
public class AbstractPigSearchJsonTest extends AbstractPigTests {

private static int testInstance = 0;
private static String previousQuery;

@Parameters
public static Collection<Object[]> queries() {
return QueryTestParams.params();
Expand All @@ -43,6 +50,10 @@ public static Collection<Object[]> queries() {

public AbstractPigSearchJsonTest(String query) {
this.query = query;
if (!query.equals(previousQuery)) {
previousQuery = query;
testInstance++;
}
}

@Before
Expand All @@ -55,19 +66,20 @@ public void before() throws Exception {
public void testNestedField() throws Exception {
String data = "{ \"data\" : { \"map\" : { \"key\" : 10 } } }";
RestUtils.putData("json-pig/nestedmap", StringUtils.toUTF(data));

RestUtils.refresh("json-pig");

String script =
"REGISTER "+ Provisioner.ESHADOOP_TESTING_JAR + ";" +
"DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage('es.mapping.names=nested:data.map.key');" +
//"A = LOAD 'json-pig/nestedmap' USING EsStorage() AS (nested:tuple(key:int));" +
"A = LOAD 'json-pig/nestedmap' USING EsStorage() AS (nested:chararray);" +
"DESCRIBE A;" +
"X = LIMIT A 3;" +
"DUMP X;";
"B = ORDER A BY nested DESC;" +
"X = LIMIT B 3;" +
"STORE A INTO '" + tmpPig() + "/testnestedfield';";
pig.executeScript(script);

String results = getResults("" + tmpPig() + "/testnestedfield");

// script =
// "REGISTER "+ Provisioner.ESHADOOP_TESTING_JAR + ";" +
// "DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage('es.query=" + query + "','es.mapping.names=nested:data.map');" +
Expand All @@ -88,8 +100,14 @@ public void testTuple() throws Exception {
"A = LOAD 'json-pig/tupleartists' USING EsStorage();" +
"X = LIMIT A 3;" +
//"DESCRIBE A;";
"DUMP X;";
"STORE A INTO '" + tmpPig() + "/testtuple';";
pig.executeScript(script);

String results = getResults("" + tmpPig() + "/testtuple");

assertThat(results, containsString(tabify("12", "Behemoth", "http://www.last.fm/music/Behemoth", "http://userserve-ak.last.fm/serve/252/54196161.jpg", "2011-10-06T22:20:25.000+03:00")));
assertThat(results, containsString(tabify("918", "Megadeth", "http://www.last.fm/music/Megadeth","http://userserve-ak.last.fm/serve/252/8129787.jpg", "2871-10-06T22:20:25.000+03:00")));
assertThat(results, containsString(tabify("982", "Foo Fighters", "http://www.last.fm/music/Foo+Fighters","http://userserve-ak.last.fm/serve/252/59495563.jpg", "2933-10-06T22:20:25.000+03:00")));
}

@Test
Expand All @@ -98,10 +116,15 @@ public void testTupleWithSchema() throws Exception {
"REGISTER "+ Provisioner.ESHADOOP_TESTING_JAR + ";" +
"DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage('es.query=" + query + "');" +
"A = LOAD 'json-pig/tupleartists' USING EsStorage() AS (name:chararray);" +
//"DESCRIBE A;" +
"X = LIMIT A 3;" +
"DUMP X;";
"B = ORDER A BY name DESC;" +
"X = LIMIT B 3;" +
"STORE B INTO '" + tmpPig() + "/testtupleschema';";
pig.executeScript(script);

String results = getResults("" + tmpPig() + "/testtupleschema");
assertThat(results, containsString("999"));
assertThat(results, containsString("12"));
assertThat(results, containsString("356"));
}

@Test
Expand All @@ -111,8 +134,14 @@ public void testFieldAlias() throws Exception {
"DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage('es.query="+ query + "');"
+ "A = LOAD 'json-pig/fieldalias' USING EsStorage();"
+ "X = LIMIT A 3;"
+ "DUMP X;";
+ "STORE A INTO '" + tmpPig() + "/testfieldalias';";
pig.executeScript(script);

String results = getResults("" + tmpPig() + "/testfieldalias");

assertThat(results, containsString(tabify("12", "Behemoth", "http://www.last.fm/music/Behemoth", "http://userserve-ak.last.fm/serve/252/54196161.jpg", "2011-10-06T22:20:25.000+03:00")));
assertThat(results, containsString(tabify("918", "Megadeth", "http://www.last.fm/music/Megadeth","http://userserve-ak.last.fm/serve/252/8129787.jpg", "2871-10-06T22:20:25.000+03:00")));
assertThat(results, containsString(tabify("982", "Foo Fighters", "http://www.last.fm/music/Foo+Fighters","http://userserve-ak.last.fm/serve/252/59495563.jpg", "2933-10-06T22:20:25.000+03:00")));
}

@Test
Expand All @@ -122,8 +151,11 @@ public void testMissingIndex() throws Exception {
"DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage('es.index.read.missing.as.empty=true','es.query=" + query + "');"
+ "A = LOAD 'foo/bar' USING EsStorage();"
+ "X = LIMIT A 3;"
+ "DUMP X;";
+ "STORE A INTO '" + tmpPig() + "/testmissingindex';";
pig.executeScript(script);

String results = getResults("" + tmpPig() + "/testmissingindex");
assertThat(results.length(), is(0));
}

@Test
Expand All @@ -133,8 +165,14 @@ public void testParentChild() throws Exception {
"DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage('es.index.read.missing.as.empty=true','es.query=" + query + "');"
+ "A = LOAD 'json-pig/child' USING EsStorage();"
+ "X = LIMIT A 3;"
+ "DUMP X;";
+ "STORE A INTO '" + tmpPig() + "/testparentchild';";
pig.executeScript(script);

String results = getResults("" + tmpPig() + "/testparentchild");

assertThat(results, containsString(tabify("12", "Behemoth", "http://www.last.fm/music/Behemoth", "http://userserve-ak.last.fm/serve/252/54196161.jpg", "2011-10-06T22:20:25.000+03:00")));
assertThat(results, containsString(tabify("918", "Megadeth", "http://www.last.fm/music/Megadeth","http://userserve-ak.last.fm/serve/252/8129787.jpg", "2871-10-06T22:20:25.000+03:00")));
assertThat(results, containsString(tabify("982", "Foo Fighters", "http://www.last.fm/music/Foo+Fighters","http://userserve-ak.last.fm/serve/252/59495563.jpg", "2933-10-06T22:20:25.000+03:00")));
}

@Test
Expand All @@ -150,4 +188,8 @@ public void testDynamicPatternFormat() throws Exception {
Assert.assertTrue(RestUtils.exists("json-pig/pattern-format-2500-10-06"));
Assert.assertTrue(RestUtils.exists("json-pig/pattern-format-2853-10-06"));
}

private static String tmpPig() {
return "tmp-pig/json-search-" + testInstance;
}
}
Expand Up @@ -30,9 +30,17 @@
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;

import static org.junit.Assert.*;

import static org.hamcrest.Matchers.*;


@RunWith(Parameterized.class)
public class AbstractPigSearchTest extends AbstractPigTests {

private static int testInstance = 0;
private static String previousQuery;

@Parameters
public static Collection<Object[]> queries() {
return QueryTestParams.params();
Expand All @@ -42,6 +50,10 @@ public static Collection<Object[]> queries() {

public AbstractPigSearchTest(String query) {
this.query = query;
if (!query.equals(previousQuery)) {
previousQuery = query;
testInstance++;
}
}

@Before
Expand All @@ -57,8 +69,12 @@ public void testTuple() throws Exception {
"A = LOAD 'pig/tupleartists' USING EsStorage();" +
"X = LIMIT A 3;" +
//"DESCRIBE A;";
"DUMP X;";
"STORE A INTO '" + tmpPig() + "/testtuple';";
pig.executeScript(script);
String results = getResults("" + tmpPig() + "/testtuple");
assertThat(results, containsString(tabify("Behemoth", "(http://www.last.fm/music/Behemoth,http://userserve-ak.last.fm/serve/252/54196161.jpg)")));
assertThat(results, containsString(tabify("Megadeth", "(http://www.last.fm/music/Megadeth,http://userserve-ak.last.fm/serve/252/8129787.jpg)")));
assertThat(results, containsString(tabify("Foo Fighters", "(http://www.last.fm/music/Foo+Fighters,http://userserve-ak.last.fm/serve/252/59495563.jpg)")));
}

@Test
Expand All @@ -67,10 +83,13 @@ public void testTupleWithSchema() throws Exception {
"REGISTER "+ Provisioner.ESHADOOP_TESTING_JAR + ";" +
"DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage('es.query=" + query + "');" +
"A = LOAD 'pig/tupleartists' USING EsStorage() AS (name:chararray);" +
//"DESCRIBE A;" +
"X = LIMIT A 3;" +
"DUMP X;";
"STORE A INTO '" + tmpPig() + "/testtupleschema';";
pig.executeScript(script);

String results = getResults("" + tmpPig() + "/testtupleschema");
assertThat(results, containsString("Behemoth"));
assertThat(results, containsString("Megadeth"));
}

@Test
Expand All @@ -80,8 +99,13 @@ public void testBag() throws Exception {
"DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage('es.query=" + query + "');"
+ "A = LOAD 'pig/bagartists' USING EsStorage();"
+ "X = LIMIT A 3;"
+ "DUMP X;";
+ "STORE A INTO '" + tmpPig() + "/testbag';";
pig.executeScript(script);
String results = getResults("" + tmpPig() + "/testbag");

assertThat(results, containsString(tabify("Behemoth", "((http://www.last.fm/music/Behemoth),(http://userserve-ak.last.fm/serve/252/54196161.jpg))")));
assertThat(results, containsString(tabify("Megadeth", "((http://www.last.fm/music/Megadeth),(http://userserve-ak.last.fm/serve/252/8129787.jpg))")));
assertThat(results, containsString(tabify("Foo Fighters", "((http://www.last.fm/music/Foo+Fighters),(http://userserve-ak.last.fm/serve/252/59495563.jpg))")));
}

@Test
Expand All @@ -90,9 +114,14 @@ public void testBagWithSchema() throws Exception {
"REGISTER "+ Provisioner.ESHADOOP_TESTING_JAR + ";" +
"DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage('es.query=" + query + "', 'es.mapping.names=data:name');"
+ "A = LOAD 'pig/bagartists' USING EsStorage() AS (data: chararray);"
+ "X = LIMIT A 3;"
+ "DUMP X;";
+ "B = ORDER A BY * DESC;"
+ "X = LIMIT B 3;"
+ "STORE X INTO '" + tmpPig() + "/testbagschema';";
pig.executeScript(script);
String results = getResults("" + tmpPig() + "/testbagschema");
assertThat(results, containsString("xotox"));
assertThat(results, containsString("the Chemodan"));
assertThat(results, containsString("www.DjFahad.com"));
}

@Test
Expand All @@ -102,8 +131,9 @@ public void testTimestamp() throws Exception {
"DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage('es.query=" + query + "');"
+ "A = LOAD 'pig/timestamp' USING EsStorage();"
+ "X = LIMIT A 3;"
+ "DUMP X;";
+ "STORE A INTO '" + tmpPig() + "/testtimestamp';";
pig.executeScript(script);
System.out.println(getResults("" + tmpPig() + "/testtimestamp"));
}

@Test
Expand All @@ -113,8 +143,14 @@ public void testFieldAlias() throws Exception {
"DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage('es.mapping.names=nAme:name, timestamp:@timestamp, uRL:url, picturE:picture', 'es.query=" + query + "');"
+ "A = LOAD 'pig/fieldalias' USING EsStorage();"
+ "X = LIMIT A 3;"
+ "DUMP X;";
+ "STORE A INTO '" + tmpPig() + "/testfieldlalias';";
pig.executeScript(script);
String results = getResults("" + tmpPig() + "/testfieldlalias");

assertThat(results, containsString("Megadeth"));
assertThat(results, containsString("http://www.last.fm/music/Megadeth"));
assertThat(results, containsString("Blur"));
assertThat(results, containsString("http://www.last.fm/music/Soundgarden"));
}

@Test
Expand All @@ -124,8 +160,10 @@ public void testMissingIndex() throws Exception {
"DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage('es.index.read.missing.as.empty=true','es.query=" + query + "');"
+ "A = LOAD 'foo/bar' USING EsStorage();"
+ "X = LIMIT A 3;"
+ "DUMP X;";
+ "STORE X INTO '" + tmpPig() + "/testmissingindex';";
pig.executeScript(script);
String results = getResults("" + tmpPig() + "/testmissingindex");
assertThat(results.length(), is(0));
}

@Test
Expand All @@ -135,8 +173,13 @@ public void testParentChild() throws Exception {
"DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage('es.index.read.missing.as.empty=true','es.query=" + query + "');"
+ "A = LOAD 'pig/child' USING EsStorage();"
+ "X = LIMIT A 3;"
+ "DUMP X;";
+ "STORE A INTO '" + tmpPig() + "/testparentchild';";
pig.executeScript(script);
String results = getResults("" + tmpPig() + "/testparentchild");

assertThat(results, containsString(tabify("181", "Paradise Lost", "((http://www.last.fm/music/Paradise+Lost),(http://userserve-ak.last.fm/serve/252/35325935.jpg))")));
assertThat(results, containsString(tabify("918", "Megadeth", "((http://www.last.fm/music/Megadeth),(http://userserve-ak.last.fm/serve/252/8129787.jpg))")));
assertThat(results, containsString(tabify("506", "Anathema", "((http://www.last.fm/music/Anathema),(http://userserve-ak.last.fm/serve/252/45858121.png))")));
}

@Test
Expand All @@ -146,11 +189,15 @@ public void testNestedObject() throws Exception {
"DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage('es.query=" + query + "');" // , 'es.mapping.names=links:links.url'
+ "A = LOAD 'pig/tupleartists' USING EsStorage() AS (name: chararray, links: tuple(chararray));"
+ "B = FOREACH A GENERATE name, links;"
//+ "ILLUSTRATE B;"
+ "DUMP B;";
+ "C = ORDER B BY name DESC;"
+ "D = LIMIT C 3;"
+ "STORE C INTO '" + tmpPig() + "/testnestedobject';";
pig.executeScript(script);
String results = getResults("" + tmpPig() + "/testnestedobject");

String tuple = "(Marilyn Manson,http://www.last.fm/music/Marilyn+Manson)";
assertThat(results, containsString(tabify("Paradise Lost", "(http://www.last.fm/music/Paradise+Lost,http://userserve-ak.last.fm/serve/252/35325935.jpg)")));
assertThat(results, containsString(tabify("Megadeth", "(http://www.last.fm/music/Megadeth,http://userserve-ak.last.fm/serve/252/8129787.jpg)")));
assertThat(results, containsString(tabify("Anathema", "(http://www.last.fm/music/Anathema,http://userserve-ak.last.fm/serve/252/45858121.png)")));
}

@Test
Expand All @@ -175,9 +222,15 @@ public void testNestedTuple() throws Exception {
+ "A = LOAD 'pig/nestedtuple' USING EsStorage() AS (my_array:tuple());"
//+ "B = FOREACH A GENERATE COUNT(my_array) AS count;"
//+ "ILLUSTRATE B;"
+ "DUMP A;"
//+ "DUMP B;"
;
+ "X = LIMIT A 3;"
+ "STORE A INTO '" + tmpPig() + "/testnestedtuple';";
pig.executeScript(script);
String results = getResults("" + tmpPig() + "/testnestedtuple");
assertThat(results, containsString("(1.a,1.b)"));
assertThat(results, containsString("(2.a,2.b)"));
}

private static String tmpPig() {
return "tmp-pig/search-" + testInstance;
}
}

0 comments on commit bbb8d73

Please sign in to comment.