Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

added test script for dumping data from elasticsearch

  • Loading branch information...
commit ccaca7d4340ee03d93e7a24edb5603a086a833e9 1 parent 62380cd
@thedatachef thedatachef authored
View
22 test/test_dump.pig
@@ -0,0 +1,22 @@
+--
+-- This tests loading data from elasticsearch
+--
+register '/usr/local/share/elasticsearch/lib/elasticsearch-0.16.0.jar';
+register '/usr/local/share/elasticsearch/lib/jline-0.9.94.jar';
+register '/usr/local/share/elasticsearch/lib/jna-3.2.7.jar';
+register '/usr/local/share/elasticsearch/lib/log4j-1.2.15.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-analyzers-3.1.0.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-core-3.1.0.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-highlighter-3.1.0.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-memory-3.1.0.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-queries-3.1.0.jar';
+register target/wonderdog-1.0-SNAPSHOT.jar;
+
+%default INDEX 'foo_test'
+%default OBJ 'foo'
+
+--
+-- Will load the data as (doc_id, contents) tuples where the contents is the original json source from elasticsearch
+--
+foo = LOAD 'es://foo_test/foo?q=character:c' USING com.infochimps.elasticsearch.pig.ElasticSearchStorage() AS (doc_id:chararray, contents:chararray);
+DUMP foo;
View
26 test/test_json_loader.pig
@@ -1,20 +1,24 @@
--
-- This tests the json indexer. Run in local mode with 'pig -x local test/test_json_loader.pig'
--
+register '/usr/local/share/elasticsearch/lib/elasticsearch-0.16.0.jar';
+register '/usr/local/share/elasticsearch/lib/jline-0.9.94.jar';
+register '/usr/local/share/elasticsearch/lib/jna-3.2.7.jar';
+register '/usr/local/share/elasticsearch/lib/log4j-1.2.15.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-analyzers-3.1.0.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-core-3.1.0.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-highlighter-3.1.0.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-memory-3.1.0.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-queries-3.1.0.jar';
register target/wonderdog-1.0-SNAPSHOT.jar;
-register /usr/local/share/elasticsearch/lib/elasticsearch-0.14.2.jar;
-register /usr/local/share/elasticsearch/lib/jline-0.9.94.jar;
-register /usr/local/share/elasticsearch/lib/jna-3.2.7.jar;
-register /usr/local/share/elasticsearch/lib/log4j-1.2.15.jar;
-register /usr/local/share/elasticsearch/lib/lucene-analyzers-3.0.3.jar;
-register /usr/local/share/elasticsearch/lib/lucene-core-3.0.3.jar;
-register /usr/local/share/elasticsearch/lib/lucene-fast-vector-highlighter-3.0.3.jar;
-register /usr/local/share/elasticsearch/lib/lucene-highlighter-3.0.3.jar;
-register /usr/local/share/elasticsearch/lib/lucene-memory-3.0.3.jar;
-register /usr/local/share/elasticsearch/lib/lucene-queries-3.0.3.jar;
%default INDEX 'foo_test'
%default OBJ 'foo'
foo = LOAD 'test/foo.json' AS (data:chararray);
-STORE foo INTO 'es://$INDEX/$OBJ' USING com.infochimps.elasticsearch.pig.ElasticSearchJsonIndex('-1', '10');
+
+--
+-- Query parameters let elasticsearch output format that we're storing json data and
+-- want to use a bulk request size of 1 record.
+--
+STORE foo INTO 'es://$INDEX/$OBJ?json=true&size=1' USING com.infochimps.elasticsearch.pig.ElasticSearchStorage();
View
22 test/test_tsv_loader.pig
@@ -1,20 +1,20 @@
--
-- This tests the tsv indexer. Run in local mode with 'pig -x local test/test_tsv_loader.pig'
--
+register '/usr/local/share/elasticsearch/lib/elasticsearch-0.16.0.jar';
+register '/usr/local/share/elasticsearch/lib/jline-0.9.94.jar';
+register '/usr/local/share/elasticsearch/lib/jna-3.2.7.jar';
+register '/usr/local/share/elasticsearch/lib/log4j-1.2.15.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-analyzers-3.1.0.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-core-3.1.0.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-highlighter-3.1.0.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-memory-3.1.0.jar';
+register '/usr/local/share/elasticsearch/lib/lucene-queries-3.1.0.jar';
register target/wonderdog-1.0-SNAPSHOT.jar;
-register /usr/local/share/elasticsearch/lib/elasticsearch-0.14.2.jar;
-register /usr/local/share/elasticsearch/lib/jline-0.9.94.jar;
-register /usr/local/share/elasticsearch/lib/jna-3.2.7.jar;
-register /usr/local/share/elasticsearch/lib/log4j-1.2.15.jar;
-register /usr/local/share/elasticsearch/lib/lucene-analyzers-3.0.3.jar;
-register /usr/local/share/elasticsearch/lib/lucene-core-3.0.3.jar;
-register /usr/local/share/elasticsearch/lib/lucene-fast-vector-highlighter-3.0.3.jar;
-register /usr/local/share/elasticsearch/lib/lucene-highlighter-3.0.3.jar;
-register /usr/local/share/elasticsearch/lib/lucene-memory-3.0.3.jar;
-register /usr/local/share/elasticsearch/lib/lucene-queries-3.0.3.jar;
%default INDEX 'foo_test'
%default OBJ 'foo'
foo = LOAD 'test/foo.tsv' AS (character:chararray, value:int);
-STORE foo INTO 'es://$INDEX/$OBJ' USING com.infochimps.elasticsearch.pig.ElasticSearchIndex('-1', '10');
+
+STORE foo INTO 'es://$INDEX/$OBJ?json=false&size=1' USING com.infochimps.elasticsearch.pig.ElasticSearchStorage();
Please sign in to comment.
Something went wrong with that request. Please try again.