Permalink
Browse files

Initial drop.

  • Loading branch information...
1 parent 7492a76 commit a0ae9a9c3ddaf04779a5cd17969c5a65419b5085 @edwardcapriolo committed Jul 14, 2012
View
@@ -0,0 +1,2 @@
+hive-protobuf
+Copyright 2012 m6d Media6degrees
View
119 pom.xml
@@ -0,0 +1,119 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>com.m6d</groupId>
+ <artifactId>hive-protobuf</artifactId>
+ <name>hive-protobuf</name>
+ <version>1.0.1-SNAPSHOT</version>
+ <description>Protobuf support for hive</description>
+ <packaging>jar</packaging>
+
+ <properties></properties>
+ <dependencies>
+
+ <dependency>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ <version>2.4.1</version>
+</dependency>
+
+
+ <dependency>
+ <groupId>com.jointhegrid</groupId>
+ <artifactId>hive_test</artifactId>
+ <version>4.0.0-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.2</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>0.20.2</version>
+ </dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.7</version>
+ <scope>test</scope>
+ </dependency>
+
+ </dependencies>
+
+ <build>
+ <pluginManagement>
+ <plugins>
+
+ <plugin>
+ <configuration>
+ <serverId>apache-main</serverId>
+ <url>http://www.apache.org/dist/hadoop/common/hadoop-0.20.2</url>
+ <fromFile>hadoop-0.20.2.tar.gz</fromFile>
+ <toDir>${project.build.directory}/hadoop</toDir>
+ </configuration>
+
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>wagon-maven-plugin</artifactId>
+ <version>1.0-beta-3</version>
+ <executions>
+ <execution>
+ <id>download-hadoop</id>
+ <phase>pre-integration-test</phase>
+ <goals>
+ <goal>download-single</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>exec-maven-plugin</artifactId>
+ <version>1.2.1</version>
+ <configuration>
+ <executable>tar</executable>
+ <arguments>
+ <argument>-xf</argument>
+ <argument>${project.build.directory}/hadoop/hadoop-0.20.2.tar.gz</argument>
+ <argument>-C</argument>
+ <argument>${project.build.directory}</argument>
+ </arguments>
+ </configuration>
+ </plugin>
+
+
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-eclipse-plugin</artifactId>
+ <version>2.5.1</version>
+ <configuration>
+ <projectNameTemplate>[artifactId]</projectNameTemplate>
+ <wtpmanifest>true</wtpmanifest>
+ <wtpapplicationxml>true</wtpapplicationxml>
+ <wtpversion>1.5</wtpversion>
+ <additionalBuildcommands>
+ <buildcommand>org.eclipse.jdt.core.javabuilder</buildcommand>
+ <buildcommand>org.maven.ide.eclipse.maven2Builder</buildcommand>
+ </additionalBuildcommands>
+ <additionalProjectnatures>
+ <projectnature>org.eclipse.jdt.core.javanature</projectnature>
+ <projectnature>org.maven.ide.eclipse.maven2Nature</projectnature>
+ </additionalProjectnatures>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>1.6</source>
+ <target>1.6</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+ </build>
+</project>
@@ -0,0 +1,59 @@
+/*
+Copyright 2012 m6d.com
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package com.m6d.hive.protobuf;
+
+/*
+ * Currently hive-protobuf does reflection to inspect and match nested
+ * protobufs. By saving a map of class/method name-> method we do not
+ * have to inspect each row.
+ */
+public class ClassMethod {
+
+ public Class clazz;
+ public String method;
+
+ public ClassMethod(Class c, String m) {
+ this.clazz = c;
+ this.method = m;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ final ClassMethod other = (ClassMethod) obj;
+ if (this.clazz != other.clazz && (this.clazz == null || !this.clazz.equals(other.clazz))) {
+ return false;
+ }
+ if ((this.method == null) ? (other.method != null) : !this.method.equals(other.method)) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int hash = 5;
+ hash = 67 * hash + (this.clazz != null ? this.clazz.hashCode() : 0);
+ hash = 67 * hash + (this.method != null ? this.method.hashCode() : 0);
+ return hash;
+ }
+
+}
@@ -0,0 +1,126 @@
+/*
+Copyright 2012 m6d.com
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package com.m6d.hive.protobuf;
+
+import java.io.DataInput;
+import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+
+/* Hive ignores keys in input formats which is PITA.
+ * This input format converts:
+ * key,value -> ull,pair(key,value)
+ */
+public class KVAsVSeqFileBinaryInputFormat
+ extends SequenceFileInputFormat<NullWritable,Pair> {
+
+ public KVAsVSeqFileBinaryInputFormat() {
+ super();
+ }
+
+ public RecordReader<NullWritable,Pair> getRecordReader
+ (InputSplit split, JobConf jobConf, Reporter reporter)
+ throws IOException {
+ return new KVAsVSeqFileBinaryRecordReader(jobConf, (FileSplit) split);
+ }
+
+ public static class KVAsVSeqFileBinaryRecordReader
+ implements RecordReader<NullWritable,Pair> {
+
+ private SequenceFile.Reader in;
+ private long start;
+ private long end;
+ private boolean done = false;
+ private DataOutputBuffer buffer = new DataOutputBuffer();
+ private SequenceFile.ValueBytes vbytes;
+
+ BytesWritable key = new BytesWritable();
+ BytesWritable value = new BytesWritable();
+
+ public KVAsVSeqFileBinaryRecordReader(Configuration conf, FileSplit split)
+ throws IOException {
+ Path path = split.getPath();
+ FileSystem fs = path.getFileSystem(conf);
+ this.in = new SequenceFile.Reader(fs, path, conf);
+ this.end = split.getStart() + split.getLength();
+ if (split.getStart() > in.getPosition()){
+ in.sync(split.getStart());
+ }
+ this.start = in.getPosition();
+ vbytes = in.createValueBytes();
+ done = start >= end;
+ }
+
+ @Override
+ public boolean next(NullWritable k, Pair v) throws IOException {
+ done=in.next(key, value);
+ v.setKey(key);
+ v.setValue(value);
+ return done;
+ }
+
+ @Override
+ public NullWritable createKey() {
+ return NullWritable.get();
+ }
+
+ public String getKeyClassName(){
+ return NullWritable.class.getName();
+ }
+
+ public String getValueClassName(){
+ return Pair.class.getName();
+ }
+
+ @Override
+ public Pair createValue() {
+ return new Pair();
+ }
+
+ @Override
+ public long getPos() throws IOException {
+ return in.getPosition();
+ }
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+
+ @Override
+ public float getProgress() throws IOException {
+ if (end == start){
+ return 0.0f;
+ } else {
+ return Math.min(1.0f, (float) ((in.getPosition() - start) /
+ (double) (end-start)));
+ }
+ }
+
+ }
+
+}
@@ -0,0 +1,52 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package com.m6d.hive.protobuf;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import org.apache.hadoop.io.Writable;
+
+
+public class Pair implements Writable {
+
+ private Writable key;
+ private Writable value;
+
+
+ public Pair(){
+
+ }
+
+ @Override
+ public void write(DataOutput d) throws IOException {
+ key.write(d);
+ value.write(d);
+ }
+
+ @Override
+ public void readFields(DataInput di) throws IOException {
+ key.readFields(di);
+ value.readFields(di);
+ }
+
+ public Writable getKey() {
+ return key;
+ }
+
+ public void setKey(Writable key) {
+ this.key = key;
+ }
+
+ public Writable getValue() {
+ return value;
+ }
+
+ public void setValue(Writable value) {
+ this.value = value;
+ }
+
+}
Oops, something went wrong.

0 comments on commit a0ae9a9

Please sign in to comment.