Permalink
Browse files

HIVE-4007 : Create abstract classes for serializer and deserializer (…

…Namit Jain via Ashutosh Chauhan)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1461235 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
1 parent 84e2bc3 commit f86f89aed2c66cc7fe0e5af3b0d871de847b10f9 @ashutoshc ashutoshc committed Mar 26, 2013
Showing with 327 additions and 66 deletions.
  1. +2 −2 contrib/src/java/org/apache/hadoop/hive/contrib/serde2/RegexSerDe.java
  2. +3 −3 contrib/src/java/org/apache/hadoop/hive/contrib/serde2/TypedBytesSerDe.java
  3. +6 −2 contrib/src/java/org/apache/hadoop/hive/contrib/serde2/s3/S3LogDeserializer.java
  4. +5 −3 hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
  5. +7 −1 ql/src/test/org/apache/hadoop/hive/serde2/TestSerDe.java
  6. +68 −0 serde/src/java/org/apache/hadoop/hive/serde2/AbstractDeserializer.java
  7. +84 −0 serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java
  8. +65 −0 serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerializer.java
  9. +4 −2 serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java
  10. +8 −2 serde/src/java/org/apache/hadoop/hive/serde2/MetadataTypedColumnsetSerDe.java
  11. +1 −1 serde/src/java/org/apache/hadoop/hive/serde2/NullStructSerDe.java
  12. +2 −1 serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java
  13. +6 −4 serde/src/java/org/apache/hadoop/hive/serde2/SerDe.java
  14. +3 −1 serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java
  15. +7 −1 serde/src/java/org/apache/hadoop/hive/serde2/TypedSerDe.java
  16. +19 −10 serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
  17. +22 −22 serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java
  18. +2 −2 serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDeBase.java
  19. +2 −3 serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDe.java
  20. +3 −2 serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java
  21. +2 −2 serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
  22. +6 −2 serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java
@@ -29,7 +29,7 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -70,7 +70,7 @@
* writableStringObjectInspector. We should switch to that when we have a UTF-8
* based Regex library.
*/
-public class RegexSerDe implements SerDe {
+public class RegexSerDe extends AbstractSerDe {
public static final Log LOG = LogFactory.getLog(RegexSerDe.class.getName());
@@ -32,19 +32,19 @@
import org.apache.hadoop.hive.ql.io.NonSyncDataInputBuffer;
import org.apache.hadoop.hive.ql.io.NonSyncDataOutputBuffer;
import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
@@ -73,7 +73,7 @@
* this, which is apparently 25% faster than the python version is available at
* http://github.com/klbostee/ctypedbytes/tree/master
*/
-public class TypedBytesSerDe implements SerDe {
+public class TypedBytesSerDe extends AbstractSerDe {
public static final Log LOG = LogFactory.getLog(TypedBytesSerDe.class
.getName());
@@ -27,7 +27,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.AbstractDeserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -42,7 +42,7 @@
* S3LogDeserializer.
*
*/
-public class S3LogDeserializer implements Deserializer {
+public class S3LogDeserializer extends AbstractDeserializer {
public static final Log LOG = LogFactory.getLog(S3LogDeserializer.class
.getName());
@@ -73,6 +73,7 @@ public S3LogDeserializer() throws SerDeException {
S3LogStruct deserializeCache = new S3LogStruct();
+ @Override
public void initialize(Configuration job, Properties tbl)
throws SerDeException {
@@ -132,6 +133,7 @@ public static Object deserialize(S3LogStruct c, String row) throws Exception {
return (c);
}
+ @Override
public Object deserialize(Writable field) throws SerDeException {
String row = null;
if (field instanceof BytesWritable) {
@@ -155,6 +157,7 @@ public Object deserialize(Writable field) throws SerDeException {
}
}
+ @Override
public ObjectInspector getObjectInspector() throws SerDeException {
return cachedObjectInspector;
}
@@ -202,6 +205,7 @@ public static void main(String[] args) {
}
+ @Override
public SerDeStats getSerDeStats() {
// no support for statistics
return null;
@@ -31,6 +31,7 @@
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.ByteStream;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
@@ -58,7 +59,7 @@
* HBaseSerDe can be used to serialize object into an HBase table and
* deserialize objects from an HBase table.
*/
-public class HBaseSerDe implements SerDe {
+public class HBaseSerDe extends AbstractSerDe {
public static final String HBASE_COLUMNS_MAPPING = "hbase.columns.mapping";
public static final String HBASE_TABLE_NAME = "hbase.table.name";
@@ -546,10 +547,11 @@ public Writable serialize(Object obj, ObjectInspector objInspector)
throw new SerDeException("HBase row key cannot be NULL");
}
- if(putTimestamp >= 0)
+ if(putTimestamp >= 0) {
put = new Put(key,putTimestamp);
- else
+ } else {
put = new Put(key);
+ }
// Serialize each field
for (int i = 0; i < fields.size(); i++) {
@@ -41,7 +41,7 @@
* TestSerDe.
*
*/
-public class TestSerDe implements SerDe {
+public class TestSerDe extends AbstractSerDe {
public static final Log LOG = LogFactory.getLog(TestSerDe.class.getName());
@@ -83,6 +83,7 @@ public TestSerDe() throws SerDeException {
separator = DefaultSeparator;
}
+ @Override
public void initialize(Configuration job, Properties tbl) throws SerDeException {
separator = DefaultSeparator;
String altSep = tbl.getProperty("testserde.default.serialization.format");
@@ -133,6 +134,7 @@ public static Object deserialize(ColumnSet c, String row, String sep,
ColumnSet deserializeCache = new ColumnSet();
+ @Override
public Object deserialize(Writable field) throws SerDeException {
String row = null;
if (field instanceof BytesWritable) {
@@ -159,16 +161,19 @@ public Object deserialize(Writable field) throws SerDeException {
}
}
+ @Override
public ObjectInspector getObjectInspector() throws SerDeException {
return cachedObjectInspector;
}
+ @Override
public Class<? extends Writable> getSerializedClass() {
return Text.class;
}
Text serializeCache = new Text();
+ @Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
if (objInspector.getCategory() != Category.STRUCT) {
@@ -198,6 +203,7 @@ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDe
return serializeCache;
}
+ @Override
public SerDeStats getSerDeStats() {
// no support for statistics
return null;
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Abstract class for implementing Deserializer. The abstract class has been created, so that
+ * new methods can be added in the underlying interface, Deserializer, and only implementations
+ * that need those methods overwrite it.
+ */
+public abstract class AbstractDeserializer implements Deserializer {
+
+ /**
+ * Initialize the HiveDeserializer.
+ *
+ * @param conf
+ * System properties
+ * @param tbl
+ * table properties
+ * @throws SerDeException
+ */
+ public abstract void initialize(Configuration conf, Properties tbl) throws SerDeException;
+
+ /**
+ * Deserialize an object out of a Writable blob. In most cases, the return
+ * value of this function will be constant since the function will reuse the
+ * returned object. If the client wants to keep a copy of the object, the
+ * client needs to clone the returned value by calling
+ * ObjectInspectorUtils.getStandardObject().
+ *
+ * @param blob
+ * The Writable object containing a serialized object
+ * @return A Java object representing the contents in the blob.
+ */
+ public abstract Object deserialize(Writable blob) throws SerDeException;
+
+ /**
+ * Get the object inspector that can be used to navigate through the internal
+ * structure of the Object returned from deserialize(...).
+ */
+ public abstract ObjectInspector getObjectInspector() throws SerDeException;
+
+ /**
+ * Returns statistics collected when serializing
+ */
+ public abstract SerDeStats getSerDeStats();
+}
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Abstract class for implementing SerDe. The abstract class has been created, so that
+ * new methods can be added in the underlying interface, SerDe, and only implementations
+ * that need those methods overwrite it.
+ */
+public abstract class AbstractSerDe implements SerDe {
+
+ /**
+ * Initialize the HiveSerializer.
+ *
+ * @param conf
+ * System properties
+ * @param tbl
+ * table properties
+ * @throws SerDeException
+ */
+ public abstract void initialize(Configuration conf, Properties tbl) throws SerDeException;
+
+ /**
+ * Returns the Writable class that would be returned by the serialize method.
+ * This is used to initialize SequenceFile header.
+ */
+ public abstract Class<? extends Writable> getSerializedClass();
+
+ /**
+ * Serialize an object by navigating inside the Object with the
+ * ObjectInspector. In most cases, the return value of this function will be
+ * constant since the function will reuse the Writable object. If the client
+ * wants to keep a copy of the Writable, the client needs to clone the
+ * returned value.
+ */
+ public abstract Writable serialize(Object obj, ObjectInspector objInspector)
+ throws SerDeException;
+
+ /**
+ * Returns statistics collected when serializing
+ */
+ public abstract SerDeStats getSerDeStats();
+
+ /**
+ * Deserialize an object out of a Writable blob. In most cases, the return
+ * value of this function will be constant since the function will reuse the
+ * returned object. If the client wants to keep a copy of the object, the
+ * client needs to clone the returned value by calling
+ * ObjectInspectorUtils.getStandardObject().
+ *
+ * @param blob
+ * The Writable object containing a serialized object
+ * @return A Java object representing the contents in the blob.
+ */
+ public abstract Object deserialize(Writable blob) throws SerDeException;
+
+ /**
+ * Get the object inspector that can be used to navigate through the internal
+ * structure of the Object returned from deserialize(...).
+ */
+ public abstract ObjectInspector getObjectInspector() throws SerDeException;
+}
Oops, something went wrong.

0 comments on commit f86f89a

Please sign in to comment.