Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
DRILL-7115: Improve Hive schema show tables performance
1. To make SHOW TABLES for Hive schema work much faster, additional Drill feature of showing only accesible tables when Storage-Based authorization is enabled was sacrificed. Now the behaviour matches to Hive/Beeline, all tables will be shown despite of accessibility. For details about previous show tables results, check description of DRILL-540. 2. In HiveDatabaseSchema implemented faster getTableNamesAndTypes() method and removed bulk related code. 3. Deprecated bulk related options and removed bulk code from AbstractSchema, DrillHiveMetastoreClient. 4. For 8000 Hive tables query returned in 1.8 seconds, for combination of 4000 tables and 8000 views query returned in 2.3 seconds. Note, that after first query table names will be cached and next queries will perform in less than 1 sec. 5. Refactored WorkspaceSchemaFactory's getTableNamesAndTypes() method to reuse existing getViews() method. 6. DrillHiveMetastoreClient was refactored. Classes were unnested and enclosed within client package with restricted visibility. Also was updated cache values type to avoid unnecessarry List to Set back and forth conversions. Client creation methods moved to separate class. So the new package exposes only factory and client class. closes #1706
- Loading branch information
Showing
20 changed files
with
1,058 additions
and
913 deletions.
There are no files selected for viewing
586 changes: 0 additions & 586 deletions
586
...ge-hive/core/src/main/java/org/apache/drill/exec/store/hive/DrillHiveMetaStoreClient.java
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
75 changes: 75 additions & 0 deletions
75
...e/core/src/main/java/org/apache/drill/exec/store/hive/client/DatabaseNameCacheLoader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.drill.exec.store.hive.client; | ||
|
||
import java.util.List; | ||
|
||
import org.apache.drill.common.AutoCloseables; | ||
import org.apache.drill.shaded.guava.com.google.common.cache.CacheLoader; | ||
import org.apache.hadoop.hive.metastore.api.MetaException; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* CacheLoader that synchronized on client and tries to reconnect when | ||
* client fails. Used by {@link HiveMetadataCache}. | ||
*/ | ||
final class DatabaseNameCacheLoader extends CacheLoader<String, List<String>> { | ||
|
||
private static final Logger logger = LoggerFactory.getLogger(DatabaseNameCacheLoader.class); | ||
|
||
private final DrillHiveMetaStoreClient client; | ||
|
||
DatabaseNameCacheLoader(DrillHiveMetaStoreClient client) { | ||
this.client = client; | ||
} | ||
|
||
@Override | ||
@SuppressWarnings("NullableProblems") | ||
public List<String> load(String key) throws Exception { | ||
synchronized (client) { | ||
try { | ||
return client.getAllDatabases(); | ||
} catch (MetaException e) { | ||
/* | ||
HiveMetaStoreClient is encapsulating both the MetaException/TExceptions inside MetaException. | ||
Since we don't have good way to differentiate, we will close older connection and retry once. | ||
This is only applicable for getAllTables and getAllDatabases method since other methods are | ||
properly throwing correct exceptions. | ||
*/ | ||
logger.warn("Failure while attempting to get hive databases. Retries once.", e); | ||
AutoCloseables.closeSilently(client::close); | ||
try { | ||
/* | ||
Attempt to reconnect. If this is a secure connection, this will fail due | ||
to the invalidation of the security token. In that case, throw the original | ||
exception and let a higher level clean up. Ideally we'd get a new token | ||
here, but doing so requires the use of a different connection, and that | ||
one has also become invalid. This code needs a rework; this is just a | ||
work-around. | ||
*/ | ||
client.reconnect(); | ||
} catch (Exception e1) { | ||
throw e; | ||
} | ||
return client.getAllDatabases(); | ||
} | ||
} | ||
} | ||
|
||
} |
106 changes: 106 additions & 0 deletions
106
.../core/src/main/java/org/apache/drill/exec/store/hive/client/DrillHiveMetaStoreClient.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.drill.exec.store.hive.client; | ||
|
||
import java.util.List; | ||
import java.util.Map; | ||
|
||
import org.apache.calcite.schema.Schema.TableType; | ||
import org.apache.drill.exec.store.hive.HiveReadEntry; | ||
import org.apache.hadoop.hive.conf.HiveConf; | ||
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; | ||
import org.apache.hadoop.hive.metastore.api.MetaException; | ||
import org.apache.thrift.TException; | ||
|
||
/** | ||
* Extension of HiveMetaStoreClient with addition of cache and methods useful | ||
* for Drill schema. Note, that access to parent class is synchronized either | ||
* on cache loading level or in overridden methods, and the synchronization | ||
* should not be neglected in child classes. | ||
*/ | ||
public class DrillHiveMetaStoreClient extends HiveMetaStoreClient { | ||
|
||
/** | ||
* Unified API for work with HiveMetaStoreClient | ||
* client through local caches. | ||
*/ | ||
private final HiveMetadataCache hiveMetadataCache; | ||
|
||
/** | ||
* Package visibility performs two roles here: | ||
* 1) ensure that child classes in same package; | ||
* 2) ensure that instances published to other packages | ||
* by {@link DrillHiveMetaStoreClientFactory}. | ||
* | ||
* @param hiveConf hive conf from storage plugin | ||
* @throws MetaException when initialization failed | ||
*/ | ||
DrillHiveMetaStoreClient(final HiveConf hiveConf) throws MetaException { | ||
super(hiveConf); | ||
hiveMetadataCache = new HiveMetadataCache(this, hiveConf); | ||
} | ||
|
||
/** | ||
* Lists all Hive database names. | ||
* | ||
* @param ignoreAuthzErrors whether authorization errors should be ignored | ||
* @return list of Hive databases | ||
* @throws TException when client fails | ||
*/ | ||
public List<String> getDatabases(boolean ignoreAuthzErrors) throws TException { | ||
return hiveMetadataCache.getDbNames(); | ||
} | ||
|
||
/** | ||
* Returns table metadata for concrete table | ||
* | ||
* @param dbName name of database | ||
* @param tableName name of table | ||
* @return {@link HiveReadEntry} containing table meta like columns, partitions etc. | ||
* @throws TException when client fails | ||
*/ | ||
public HiveReadEntry getHiveReadEntry(final String dbName, final String tableName, boolean ignoreAuthzErrors) throws TException { | ||
return hiveMetadataCache.getHiveReadEntry(dbName, tableName); | ||
} | ||
|
||
/** | ||
* Returns collection of view and table names along with their types. | ||
* | ||
* @param dbName name of database | ||
* @param ignoreAuthzErrors hint for handling authorization errors | ||
* @return map where keys are db object names values are types (VIEW or TABLE) | ||
* @throws TException in case when if loader thrown ExecutionException | ||
*/ | ||
public Map<String, TableType> getTableNamesAndTypes(final String dbName, boolean ignoreAuthzErrors) throws TException { | ||
return hiveMetadataCache.getTableNamesAndTypes(dbName); | ||
} | ||
|
||
/** | ||
* Overridden to enforce synchronization. | ||
* | ||
* @param owner the intended owner for the token | ||
* @param renewerKerberosPrincipalName kerberos user | ||
* @return the string of the token | ||
* @throws TException when client fails | ||
*/ | ||
@Override | ||
public synchronized String getDelegationToken(String owner, String renewerKerberosPrincipalName) throws TException { | ||
return super.getDelegationToken(owner, renewerKerberosPrincipalName); | ||
} | ||
|
||
} |
104 changes: 104 additions & 0 deletions
104
...rc/main/java/org/apache/drill/exec/store/hive/client/DrillHiveMetaStoreClientFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.drill.exec.store.hive.client; | ||
|
||
import java.io.IOException; | ||
import java.security.PrivilegedExceptionAction; | ||
|
||
import org.apache.drill.common.exceptions.DrillRuntimeException; | ||
import org.apache.drill.exec.util.ImpersonationUtil; | ||
import org.apache.hadoop.hive.conf.HiveConf; | ||
import org.apache.hadoop.hive.metastore.api.MetaException; | ||
import org.apache.hadoop.hive.shims.Utils; | ||
import org.apache.hadoop.security.UserGroupInformation; | ||
|
||
/** | ||
* Provides factory methods for initialization of {@link DrillHiveMetaStoreClient} instances. | ||
*/ | ||
public final class DrillHiveMetaStoreClientFactory { | ||
|
||
private DrillHiveMetaStoreClientFactory() { | ||
} | ||
|
||
/** | ||
* Create a DrillHiveMetaStoreClient for cases where: | ||
* 1. Drill impersonation is enabled and | ||
* 2. either storage (in remote HiveMetaStore server) or SQL standard based authorization (in Hive storage plugin) | ||
* is enabled | ||
* | ||
* @param processUserMetaStoreClient MetaStoreClient of process user. Useful for generating the delegation tokens when | ||
* SASL (KERBEROS or custom SASL implementations) is enabled. | ||
* @param hiveConf Conf including authorization configuration | ||
* @param userName User who is trying to access the Hive metadata | ||
* @return instance of client | ||
*/ | ||
public static DrillHiveMetaStoreClient createClientWithAuthz(final DrillHiveMetaStoreClient processUserMetaStoreClient, | ||
final HiveConf hiveConf, final String userName) { | ||
try { | ||
boolean delegationTokenGenerated = false; | ||
|
||
final UserGroupInformation ugiForRpc; // UGI credentials to use for RPC communication with Hive MetaStore server | ||
if (!hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS)) { | ||
// If the user impersonation is disabled in Hive storage plugin (not Drill impersonation), use the process | ||
// user UGI credentials. | ||
ugiForRpc = ImpersonationUtil.getProcessUserUGI(); | ||
} else { | ||
ugiForRpc = ImpersonationUtil.createProxyUgi(userName); | ||
if (hiveConf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL)) { | ||
// When SASL is enabled for proxy user create a delegation token. Currently HiveMetaStoreClient can create | ||
// client transport for proxy users only when the authentication mechanims is DIGEST (through use of | ||
// delegation tokens). | ||
String delegationToken = processUserMetaStoreClient.getDelegationToken(userName, userName); | ||
try { | ||
Utils.setTokenStr(ugiForRpc, delegationToken, DrillHiveMetaStoreClientWithAuthorization.DRILL2HMS_TOKEN); | ||
} catch (IOException e) { | ||
throw new DrillRuntimeException("Couldn't setup delegation token in the UGI for Hive MetaStoreClient", e); | ||
} | ||
delegationTokenGenerated = true; | ||
} | ||
} | ||
|
||
final HiveConf hiveConfForClient; | ||
if (delegationTokenGenerated) { | ||
hiveConfForClient = new HiveConf(hiveConf); | ||
hiveConfForClient.set("hive.metastore.token.signature", DrillHiveMetaStoreClientWithAuthorization.DRILL2HMS_TOKEN); | ||
} else { | ||
hiveConfForClient = hiveConf; | ||
} | ||
|
||
return ugiForRpc.doAs((PrivilegedExceptionAction<DrillHiveMetaStoreClient>) | ||
() -> new DrillHiveMetaStoreClientWithAuthorization(hiveConfForClient, ugiForRpc, userName)); | ||
} catch (final Exception e) { | ||
throw new DrillRuntimeException("Failure setting up HiveMetaStore client.", e); | ||
} | ||
} | ||
|
||
/** | ||
* Create a DrillMetaStoreClient that can be shared across multiple users. This is created when impersonation is | ||
* disabled. | ||
* | ||
* @param hiveConf hive properties set in Drill storage plugin | ||
* @return instance of client | ||
* @throws MetaException when initialization failed | ||
*/ | ||
public static DrillHiveMetaStoreClient createCloseableClientWithCaching(final HiveConf hiveConf) | ||
throws MetaException { | ||
return new DrillHiveMetaStoreClient(hiveConf); | ||
} | ||
|
||
} |
Oops, something went wrong.