HIVE-1555: JDBC Storage Handler (Gunther Hagleitner, reviewed by Jaso…

…n Dere)
apache · Mar 1, 2017 · 12b27a3 · 12b27a3
1 parent a9de1cd
commit 12b27a3
Show file tree

Hide file tree

Showing 35 changed files with 2,898 additions and 0 deletions.
diff --git a/itests/qtest/pom.xml b/itests/qtest/pom.xml
@@ -119,6 +119,13 @@
       <classifier>tests</classifier>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-jdbc-handler</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+
 
     <!-- test inter-project -->
     <dependency>

diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
@@ -499,6 +499,7 @@ minillaplocal.query.files=acid_globallimit.q,\
   input16_cc.q,\
   insert_dir_distcp.q,\
   insert_into_with_schema.q,\
+  jdbc_handler.q,\
   join1.q,\
   join_acid_non_acid.q,\
   join_filters.q,\

diff --git a/jdbc-handler/pom.xml b/jdbc-handler/pom.xml
@@ -0,0 +1,127 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.hive</groupId>
+    <artifactId>hive</artifactId>
+    <version>2.2.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>hive-jdbc-handler</artifactId>
+  <packaging>jar</packaging>
+  <name>Hive JDBC Handler</name>
+
+  <properties>
+    <hive.path.to.root>..</hive.path.to.root>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-common</artifactId>
+      <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.eclipse.jetty.aggregate</groupId>
+          <artifactId>jetty-all</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-shims</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-exec</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-serde</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-core</artifactId>
+      <version>${hadoop.version}</version>
+      <optional>true</optional>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-common</artifactId>
+      <version>${hadoop.version}</version>
+      <optional>true</optional>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-log4j12</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commmons-logging</groupId>
+          <artifactId>commons-logging</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <version>${hamcrest.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <version>${mockito-all.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-common</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+      <type>test-jar</type>
+    </dependency>
+
+    <dependency>
+      <groupId>com.h2database</groupId>
+      <artifactId>h2</artifactId>
+      <version>${h2database.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+
+</project>
diff --git a/jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/JdbcInputFormat.java b/jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/JdbcInputFormat.java
@@ -0,0 +1,108 @@
+/*
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.storage.jdbc;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.MapWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hive.storage.jdbc.dao.DatabaseAccessor;
+import org.apache.hive.storage.jdbc.dao.DatabaseAccessorFactory;
+
+import java.io.IOException;
+
+public class JdbcInputFormat extends HiveInputFormat<LongWritable, MapWritable> {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(JdbcInputFormat.class);
+  private DatabaseAccessor dbAccessor = null;
+
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public RecordReader<LongWritable, MapWritable>
+    getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
+
+    if (!(split instanceof JdbcInputSplit)) {
+      throw new RuntimeException("Incompatible split type " + split.getClass().getName() + ".");
+    }
+
+    return new JdbcRecordReader(job, (JdbcInputSplit) split);
+  }
+
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+    try {
+      if (numSplits <= 0) {
+        numSplits = 1;
+      }
+      LOGGER.debug("Creating {} input splits", numSplits);
+      if (dbAccessor == null) {
+        dbAccessor = DatabaseAccessorFactory.getAccessor(job);
+      }
+
+      int numRecords = dbAccessor.getTotalNumberOfRecords(job);
+      int numRecordsPerSplit = numRecords / numSplits;
+      int numSplitsWithExtraRecords = numRecords % numSplits;
+
+      LOGGER.debug("Num records = {}", numRecords);
+      InputSplit[] splits = new InputSplit[numSplits];
+      Path[] tablePaths = FileInputFormat.getInputPaths(job);
+
+      int offset = 0;
+      for (int i = 0; i < numSplits; i++) {
+        int numRecordsInThisSplit = numRecordsPerSplit;
+        if (i < numSplitsWithExtraRecords) {
+          numRecordsInThisSplit++;
+        }
+
+        splits[i] = new JdbcInputSplit(numRecordsInThisSplit, offset, tablePaths[0]);
+        offset += numRecordsInThisSplit;
+      }
+
+      return splits;
+    }
+    catch (Exception e) {
+      LOGGER.error("Error while splitting input data.", e);
+      throw new IOException(e);
+    }
+  }
+
+
+  /**
+   * For testing purposes only
+   *
+   * @param dbAccessor
+   *            DatabaseAccessor object
+   */
+  public void setDbAccessor(DatabaseAccessor dbAccessor) {
+    this.dbAccessor = dbAccessor;
+  }
+
+}
diff --git a/jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/JdbcInputSplit.java b/jdbc-handler/src/main/java/org/apache/hive/storage/jdbc/JdbcInputSplit.java
@@ -0,0 +1,100 @@
+/*
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.storage.jdbc;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+public class JdbcInputSplit extends FileSplit implements InputSplit {
+
+  private static final String[] EMPTY_ARRAY = new String[] {};
+
+  private int limit = 0;
+  private int offset = 0;
+
+
+  public JdbcInputSplit() {
+    super((Path) null, 0, 0, EMPTY_ARRAY);
+
+  }
+
+
+  public JdbcInputSplit(long start, long end, Path dummyPath) {
+    super(dummyPath, 0, 0, EMPTY_ARRAY);
+    this.setLimit((int) start);
+    this.setOffset((int) end);
+  }
+
+
+  public JdbcInputSplit(int limit, int offset) {
+    super((Path) null, 0, 0, EMPTY_ARRAY);
+    this.limit = limit;
+    this.offset = offset;
+  }
+
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+    out.writeInt(limit);
+    out.writeInt(offset);
+  }
+
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    limit = in.readInt();
+    offset = in.readInt();
+  }
+
+
+  @Override
+  public long getLength() {
+    return limit;
+  }
+
+
+  @Override
+  public String[] getLocations() throws IOException {
+    return EMPTY_ARRAY;
+  }
+
+
+  public int getLimit() {
+    return limit;
+  }
+
+
+  public void setLimit(int limit) {
+    this.limit = limit;
+  }
+
+
+  public int getOffset() {
+    return offset;
+  }
+
+
+  public void setOffset(int offset) {
+    this.offset = offset;
+  }
+
+}