Skip to content

Commit

Permalink
IMPALA-5741: Initial support for reading tiny RDBMS tables
Browse files Browse the repository at this point in the history
This patch uses the "external data source" mechanism in Impala to
implement data source for querying JDBC.
It has some limitations due to the restrictions of "external data
source":
  - It is not distributed, e.g, fragment is unpartitioned. The queries
    are executed on coordinator.
  - Queries which read following data types from external JDBC tables
    are not supported:
    BINARY, CHAR, DATETIME, and COMPLEX.
  - Only support binary predicates with operators =, !=, <=, >=,
    <, > to be pushed to RDBMS.
  - Following data types are not supported for predicates:
    DECIMAL, TIMESTAMP, DATE, and BINARY.
  - External tables with complex types of columns are not supported.
  - Support is limited to the following databases:
    MySQL, Postgres, Oracle, MSSQL, H2, DB2, and JETHRO_DATA.
  - Catalog V2 is not supported (IMPALA-7131).
  - DataSource objects are not persistent (IMPALA-12375).

Additional fixes are planned on top of this patch.

Source files under jdbc/conf, jdbc/dao and jdbc/exception are
replicated from Hive JDBC Storage Handler.

In order to query the RDBMS tables, the following steps should be
followed (note that existing data source table will be rebuilt):
1. Make sure the Impala cluster has been started.

2. Copy the jar files of JDBC drivers and the data source library into
HDFS.
${IMPALA_HOME}/testdata/bin/copy-ext-data-sources.sh

3. Create an `alltypes` table in the Postgres database.
${IMPALA_HOME}/testdata/bin/load-ext-data-sources.sh

4. Create data source tables (alltypes_jdbc_datasource and
alltypes_jdbc_datasource_2).
${IMPALA_HOME}/bin/impala-shell.sh -f\
  ${IMPALA_HOME}/testdata/bin/create-ext-data-source-table.sql

5. It's ready to run query to access data source tables created
in last step. Don't need to restart Impala cluster.

Testing:
 - Added unit-test for Postgres and ran unit-test with JDBC driver
   postgresql-42.5.1.jar.
 - Ran manual unit-test for MySql with JDBC driver
   mysql-connector-j-8.1.0.jar.
 - Ran core tests successfully.

Change-Id: I8244e978c7717c6f1452f66f1630b6441392e7d2
Reviewed-on: http://gerrit.cloudera.org:8080/17842
Reviewed-by: Wenzhe Zhou <wzhou@cloudera.com>
Reviewed-by: Kurt Deschler <kdeschle@cloudera.com>
Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
  • Loading branch information
chufucun authored and Impala Public Jenkins committed Oct 10, 2023
1 parent 8b2598c commit c2bd30a
Show file tree
Hide file tree
Showing 33 changed files with 2,469 additions and 67 deletions.
1 change: 1 addition & 0 deletions bin/rat_exclude_files.txt
Expand Up @@ -136,6 +136,7 @@ fe/src/test/resources/hbase-jaas-client.conf.template
fe/src/test/resources/hbase-jaas-server.conf.template
fe/src/test/resources/users.ldif
java/.mvn/maven.config
java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/README.md
java/toolchains.xml.tmpl
testdata/AllTypesError/*.txt
testdata/AllTypesErrorNoNulls/*.txt
Expand Down
Expand Up @@ -18,6 +18,7 @@
package org.apache.impala.extdatasource;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.net.URL;
import java.net.URLClassLoader;
Expand Down Expand Up @@ -83,6 +84,9 @@ public class ExternalDataSourceExecutor {
// Protects cachedClasses_, numClassCacheHits_, and numClassCacheMisses_.
private final static Object cachedClassesLock_ = new Object();

// setup by ctor() and cleared by release()
private URLClassLoader classLoader_;

private final ApiVersion apiVersion_;
private final ExternalDataSource dataSource_;
private final String jarPath_;
Expand Down Expand Up @@ -156,6 +160,8 @@ private Class<?> getDataSourceClass() throws Exception {
// Only cache the class if the init string starts with CACHE_CLASS_PREFIX
if (initString_ != null && initString_.startsWith(CACHE_CLASS_PREFIX)) {
cachedClasses_.put(cacheMapKey, c);
} else {
classLoader_ = loader;
}
if (LOG.isTraceEnabled()) {
LOG.trace("Loaded jar for class {} at path {}", className_, jarPath_);
Expand All @@ -168,6 +174,27 @@ private Class<?> getDataSourceClass() throws Exception {
return c;
}

@Override
protected void finalize() throws Throwable {
release();
super.finalize();
}

/**
* Release the class loader we have created if the class is not cached.
*/
public void release() {
if (classLoader_ != null) {
try {
classLoader_.close();
} catch (IOException e) {
// Log and ignore.
LOG.warn("Error closing the URLClassloader.", e);
}
classLoader_ = null;
}
}

public byte[] prepare(byte[] thriftParams) throws ImpalaException {
TPrepareParams params = new TPrepareParams();
JniUtil.deserializeThrift(protocolFactory_, params, thriftParams);
Expand Down
6 changes: 5 additions & 1 deletion fe/src/test/java/org/apache/impala/service/FrontendTest.java
Expand Up @@ -143,13 +143,17 @@ public void TestGetTablesTypeTable() throws ImpalaException {
// HiveServer2 GetTables has 5 columns.
assertEquals(5, resp.schema.columns.size());
assertEquals(5, resp.rows.get(0).colVals.size());
assertEquals(3, resp.rows.size());
assertEquals(5, resp.rows.size());
assertEquals("alltypes_datasource",
resp.rows.get(0).colVals.get(2).string_val.toLowerCase());
assertEquals("alltypes_date_partition",
resp.rows.get(1).colVals.get(2).string_val.toLowerCase());
assertEquals("alltypes_date_partition_2",
resp.rows.get(2).colVals.get(2).string_val.toLowerCase());
assertEquals("alltypes_jdbc_datasource",
resp.rows.get(3).colVals.get(2).string_val.toLowerCase());
assertEquals("alltypes_jdbc_datasource_2",
resp.rows.get(4).colVals.get(2).string_val.toLowerCase());
}

@Test
Expand Down
84 changes: 84 additions & 0 deletions java/ext-data-source/jdbc/pom.xml
@@ -0,0 +1,84 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.impala</groupId>
<artifactId>impala-data-source</artifactId>
<version>4.4.0-SNAPSHOT</version>
</parent>
<artifactId>impala-data-source-jdbc</artifactId>
<name>Apache Impala External Data Source JDBC Library</name>
<description>JDBC External Data Source</description>
<packaging>jar</packaging>
<url>.</url>

<properties>
<commons-dbcp2.version>2.9.0</commons-dbcp2.version>
<h2database.version>1.3.166</h2database.version>
</properties>

<dependencies>
<dependency>
<groupId>org.apache.impala</groupId>
<artifactId>impala-data-source-api</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.impala</groupId>
<artifactId>impala-frontend</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-dbcp2</artifactId>
<version>${commons-dbcp2.version}</version>
</dependency>
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<version>${h2database.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>

0 comments on commit c2bd30a

Please sign in to comment.