Permalink
Browse files

Adding scripts and updated examples to make it more clear.

  • Loading branch information...
1 parent 4bce149 commit 90216d673cee11fd7a3aa2a7e00dbe06fe856fd8 @jeromatron committed Jul 15, 2011
@@ -1,6 +1,8 @@
create keyspace pygmalion;
use pygmalion;
-create column family account with comparator = UTF8Type and default_validation_class = UTF8Type and
+create column family account with
+ comparator = UTF8Type and
+ default_validation_class = UTF8Type and
column_metadata=
[
{column_name: num_heads, validation_class: LongType},
@@ -0,0 +1,26 @@
+create keyspace pygmalion;
+use pygmalion;
+create column family account with
+ comparator = UTF8Type and
+ default_validation_class = UTF8Type and
+ key_validation_class = UTF8Type and
+ column_metadata=
+ [
+ {column_name: num_heads, validation_class: LongType},
+ ];
+create column family betelgeuse with comparator = UTF8Type and default_validation_class = UTF8Type;
+
+set account['hipcat']['first_name'] = 'Zaphod';
+set account['hipcat']['last_name'] = 'Beeblebrox';
+set account['hipcat']['birth_place'] = 'Betelgeuse Five';
+set account['hipcat']['num_heads'] = '2';
+
+set account['hoopyfrood']['first_name'] = 'Ford';
+set account['hoopyfrood']['last_name'] = 'Prefect';
+set account['hoopyfrood']['birth_place'] = 'Betelgeuse Five';
+set account['hoopyfrood']['num_heads'] = '1';
+
+set account['earthman']['first_name'] = 'Arthur';
+set account['earthman']['last_name'] = 'Dent';
+set account['earthman']['birth_place'] = 'Earth';
+set account['earthman']['num_heads'] = '1';
@@ -0,0 +1,17 @@
+register 'pygmalion.jar';
+
+define FromCassandraBag org.pygmalion.udf.FromCassandraBag();
+define DeleteColumns org.pygmalion.udf.DeleteColumns();
+
+raw = LOAD 'cassandra://SocialData/signal' USING org.apache.cassandra.hadoop.pig.CassandraStorage() AS (key:chararray, columns:bag {column:tuple (name, value)});
+
+account = FOREACH raw_signals GENERATE key, FLATTEN(FromCassandraBag('first_name, last_name, birth_place',columns)) AS (
+ first_name: chararray,
+ last_name: chararray,
+ birth_place: chararray,
+ num_heads: long
+ );
+
+account_cassandra = FOREACH filtered GENERATE FLATTEN(DeleteColumns(key, num_heads));
+
+STORE accountcassandra INTO 'cassandra://pygmalion/account' USING CassandraStorage();
@@ -1,7 +1,10 @@
register 'pygmalion.jar';
+define FromCassandraBag org.pygmalion.udf.FromCassandraBag();
+define ToCassandraBag org.pygmalion.udf.ToCassandraBag();
+
raw = LOAD 'cassandra://pygmalion/account' USING CassandraStorage() AS (key:chararray, columns:bag {column:tuple (name, value)});
-rows = FOREACH raw GENERATE key, FLATTEN(org.pygmalion.udf.FromCassandraBag('first_name, last_name, birth_place', columns)) AS (
+rows = FOREACH raw GENERATE key, FLATTEN(FromCassandraBag('first_name, last_name, birth_place', columns)) AS (
first_name:chararray,
last_name:chararray,
birth_place:chararray
@@ -10,10 +13,6 @@ rows = FOREACH raw GENERATE key, FLATTEN(org.pygmalion.udf.FromCassandraBag('fir
betelgeuse_born = FILTER rows BY (birth_place matches '.*[Bb]etelgeuse.*');
betelgeuse_cassandra = FOREACH betelgeuse_born GENERATE
- FLATTEN(org.pygmalion.udf.ToCassandraBag(first_name, last_name, birth_place)) AS (
- first_name:chararray,
- last_name:chararray,
- birth_place:chararray
- );
+ FLATTEN(ToCassandraBag(first_name, last_name, birth_place));
STORE betelgeuse_cassandra INTO 'cassandra://pygmalion/betelgeuse' USING CassandraStorage();
View
@@ -1,4 +1,6 @@
+%default CF account
+
-- This script simply gets a row count of the given column family
-rows = LOAD 'cassandra://pygmalion/account' USING CassandraStorage() AS (key, columns: bag {T: tuple(name, value)});
+rows = LOAD 'cassandra://pygmalion/$CF' USING CassandraStorage() AS (key, columns: bag {T: tuple(name, value)});
counted = foreach (group rows all) generate COUNT($1);
dump counted;

0 comments on commit 90216d6

Please sign in to comment.