Permalink
Browse files

Yay! First code drop.

  • Loading branch information...
1 parent 65abae9 commit d43c2feb213a94b49e32a389eb802619ea0fa79c @edwardcapriolo committed May 1, 2012
View
@@ -0,0 +1,2 @@
+DualInputFormat
+Copyright 2010, 2011 m6d Media6degrees
View
110 pom.xml
@@ -0,0 +1,110 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>com.jointhegrid</groupId>
+ <artifactId>DualInputFormat</artifactId>
+ <name>DualInputFormat</name>
+ <version>1.0.0-SNAPSHOT</version>
+ <description>To be used with hive for queries designed to only return a single line.</description>
+ <packaging>jar</packaging>
+
+ <properties></properties>
+ <dependencies>
+ <dependency>
+ <groupId>com.jointhegrid</groupId>
+ <artifactId>hive_test</artifactId>
+ <version>3.0.1-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.2</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>0.20.2</version>
+ </dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.7</version>
+ <scope>test</scope>
+ </dependency>
+
+ </dependencies>
+
+ <build>
+ <pluginManagement>
+ <plugins>
+
+ <plugin>
+ <configuration>
+ <serverId>apache-main</serverId>
+ <url>http://www.apache.org/dist/hadoop/common/hadoop-0.20.2</url>
+ <fromFile>hadoop-0.20.2.tar.gz</fromFile>
+ <toDir>${project.build.directory}/hadoop</toDir>
+ </configuration>
+
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>wagon-maven-plugin</artifactId>
+ <version>1.0-beta-3</version>
+ <executions>
+ <execution>
+ <id>download-hadoop</id>
+ <phase>pre-integration-test</phase>
+ <goals>
+ <goal>download-single</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>exec-maven-plugin</artifactId>
+ <version>1.2.1</version>
+ <configuration>
+ <executable>tar</executable>
+ <arguments>
+ <argument>-xf</argument>
+ <argument>${project.build.directory}/hadoop/hadoop-0.20.2.tar.gz</argument>
+ <argument>-C</argument>
+ <argument>${project.build.directory}</argument>
+ </arguments>
+ </configuration>
+ </plugin>
+
+
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-eclipse-plugin</artifactId>
+ <version>2.5.1</version>
+ <configuration>
+ <projectNameTemplate>[artifactId]</projectNameTemplate>
+ <wtpmanifest>true</wtpmanifest>
+ <wtpapplicationxml>true</wtpapplicationxml>
+ <wtpversion>1.5</wtpversion>
+ <additionalBuildcommands>
+ <buildcommand>org.eclipse.jdt.core.javabuilder</buildcommand>
+ <buildcommand>org.maven.ide.eclipse.maven2Builder</buildcommand>
+ </additionalBuildcommands>
+ <additionalProjectnatures>
+ <projectnature>org.eclipse.jdt.core.javanature</projectnature>
+ <projectnature>org.maven.ide.eclipse.maven2Nature</projectnature>
+ </additionalProjectnatures>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>1.6</source>
+ <target>1.6</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+ </build>
+</project>
@@ -0,0 +1,41 @@
+/*
+Copyright 2011 m6d.com
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package com.m6d.dualinputformat;
+
+import java.io.IOException;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+public class DualInputFormat implements InputFormat{
+
+ @Override
+ public InputSplit[] getSplits(JobConf jc, int i) throws IOException {
+ InputSplit [] splits = new DualInputSplit[1];
+ splits[0]= new DualInputSplit();
+ return splits;
+ }
+
+ @Override
+ public RecordReader<Text,Text> getRecordReader(InputSplit split, JobConf jc,
+ Reporter rprtr) throws IOException {
+ return new DualRecordReader(jc, split);
+ }
+
+}
@@ -0,0 +1,43 @@
+/*
+Copyright 2011 m6d.com
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package com.m6d.dualinputformat;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import org.apache.hadoop.mapred.InputSplit;
+
+public class DualInputSplit implements InputSplit {
+
+ @Override
+ public long getLength() throws IOException {
+ return 1;
+ }
+
+ @Override
+ public String[] getLocations() throws IOException {
+ return new String [] { "localhost" };
+ }
+
+ @Override
+ public void write(DataOutput d) throws IOException {
+ }
+
+ @Override
+ public void readFields(DataInput di) throws IOException {
+ }
+
+}
@@ -0,0 +1,58 @@
+package com.m6d.dualinputformat;
+
+import java.io.IOException;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+
+public class DualRecordReader implements RecordReader<Text,Text>{
+
+ boolean hasNext=true;
+
+ public DualRecordReader(JobConf jc, InputSplit s) {
+
+ }
+
+ public DualRecordReader(){
+
+ }
+
+ @Override
+ public long getPos() throws IOException {
+ return 0;
+ }
+
+ @Override
+ public void close() throws IOException {
+ }
+
+ @Override
+ public float getProgress() throws IOException {
+ if (hasNext)
+ return 0.0f;
+ else
+ return 1.0f;
+ }
+
+ @Override
+ public Text createKey() {
+ return new Text("");
+ }
+
+ @Override
+ public Text createValue() {
+ return new Text("");
+ }
+
+ @Override
+ public boolean next(Text k, Text v) throws IOException {
+ if (hasNext){
+ hasNext=false;
+ return true;
+ } else {
+ return hasNext;
+ }
+ }
+
+}
@@ -0,0 +1,53 @@
+package com.m6d.dualinputformat;
+
+import com.jointhegrid.hive_test.HiveTestService;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.junit.Ignore;
+
+public class DualInputFormatTest extends HiveTestService {
+
+ public DualInputFormatTest() throws IOException {
+ super();
+ }
+
+ //add test to the name if you want to run this.
+ public void Execute() throws Exception {
+ Path p = new Path(this.ROOT_DIR, "afile");
+
+ FSDataOutputStream o = this.getFileSystem().create(p);
+ BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(o));
+ bw.write("1\n");
+ bw.write("2\n");
+ bw.close();
+
+ String jarFile = DualInputFormat.class.getProtectionDomain()
+ .getCodeSource().getLocation().getFile();
+
+ //these do not work with hive thrift we have to get the
+ ///files into auxlib
+ //DistributedCache.addCacheFile( new URI(jarFile), new Configuration());
+ //client.execute( "set hive.aux.jars.path="+new URI(jarFile).toASCIIString());
+ //do not know how to work around this for now build and put in hadoop_home/lib... yuk
+
+ client.execute ( "add jar "+jarFile);
+ client.execute("create table dual (fake string) "+
+ "STORED AS INPUTFORMAT 'com.m6d.dualinputformat.DualInputFormat'"+
+ "OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'");
+ client.execute("load data local inpath '" + p.toString() + "' into table dual");
+ client.execute("select count(1) as cnt from dual");
+ String row = client.fetchOne();
+ assertEquals( "1", row);
+
+ client.execute("select * from dual");
+ row = client.fetchOne();
+ assertEquals( "", row);
+
+ client.execute("drop table dual");
+ }
+
+
+}

0 comments on commit d43c2fe

Please sign in to comment.