Permalink
Browse files

Added testing and logging and an exception when there is a mismatch

  • Loading branch information...
1 parent 373941c commit b84310b299a9bec640b88fde92912df33ae138f8 Grant Ingersoll committed Jul 2, 2011
View
@@ -26,6 +26,12 @@
<scope>test</scope>
</dependency>
<dependency>
+ <groupId>org.easymock</groupId>
+ <artifactId>easymock</artifactId>
+ <version>3.0</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>0.20.2</version>
@@ -40,5 +46,15 @@
<artifactId>hector-core</artifactId>
<version>0.7.0-28</version>
</dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <version>1.6.1</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ <version>1.6.1</version>
+ </dependency>
</dependencies>
</project>
@@ -8,10 +8,13 @@
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.UDFContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.List;
import java.util.Properties;
+import java.util.regex.Pattern;
/**
* EvalFunc to take the given set of values and convert them
@@ -25,23 +28,28 @@
* be persisted individually.
*/
public class ToCassandraBag extends EvalFunc<Tuple> {
-
- private static String UDFCONTEXT_SCHEMA_KEY = "cassandra.input_field_schema";
- private static String INPUT_DELIM = "[\\s,]+";
- private static char OUTPUT_DELIM = ',';
+ private transient static Logger log = LoggerFactory.getLogger(ToCassandraBag.class);
+ public static final String UDFCONTEXT_SCHEMA_KEY = "cassandra.input_field_schema";
+ private static final Pattern INPUT_DELIM = Pattern.compile("[\\s,]+");
+ private static final char OUTPUT_DELIM = ',';
public Tuple exec(Tuple input) throws IOException {
Tuple row = TupleFactory.getInstance().newTuple(2);
DataBag columns = BagFactory.getInstance().newDefaultBag();
UDFContext context = UDFContext.getUDFContext();
Properties property = context.getUDFProperties(ToCassandraBag.class);
String fieldString = property.getProperty(UDFCONTEXT_SCHEMA_KEY);
- String [] fieldnames = fieldString.split(INPUT_DELIM);
+ String [] fieldnames = INPUT_DELIM.split(fieldString);
+ if (log.isDebugEnabled()) {
+ log.debug("Tuple: " + input.toDelimitedString(",") + " Fields: " + fieldString);
+ }
// IT IS ALWAYS ASSUMED THAT THE OBJECT AT INDEX 0 IS THE ROW KEY
if(input.get(0)==null)
throw new IOException("The object at index 0 is the row key, its value can't be null!");
-
+ if (input.size() != fieldnames.length){
+ throw new IOException("There is a mismatch between the number of inputs (" + input.size() + " and fieldnames (" + fieldnames.length + ")");
+ }
for (int i=1; i<input.size(); i++) {
if (input.get(i) instanceof DataBag) {
columns.addAll((DataBag) input.get(i));
@@ -0,0 +1,40 @@
+package org.pygmailion.udf;
+
+
+import org.apache.pig.data.DefaultTuple;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.util.UDFContext;
+import org.eclipse.jdt.internal.compiler.codegen.AttributeNamesConstants;
+import org.junit.Test;
+import org.pygmalion.udf.ToCassandraBag;
+
+import java.util.Properties;
+
+import static junit.framework.Assert.assertNotNull;
+
+/**
+ *
+ *
+ **/
+public class ToCassandraBagTest {
+ private String [] fields = {"a", "b", "c", "d", "e", "f", "g", "h", "i"};
+
+ @Test
+ public void test() throws Exception {
+ ToCassandraBag tcb = new ToCassandraBag();
+ UDFContext context = UDFContext.getUDFContext();
+ Properties properties = context.getUDFProperties(ToCassandraBag.class);
+ Tuple input = new DefaultTuple();
+ StringBuilder builder = new StringBuilder();
+ for (int i = 0; i < fields.length; i++){
+ builder.append(fields[i]);
+ input.append("foo" + i);
+ if (i < fields.length - 1){
+ builder.append(',');
+ }
+ }
+ properties.setProperty(ToCassandraBag.UDFCONTEXT_SCHEMA_KEY, builder.toString());
+ Tuple tuple = tcb.exec(input);
+ assertNotNull("Tuple is null", tuple);
+ }
+}

0 comments on commit b84310b

Please sign in to comment.