From cf51aa7176ee6cc285af6aa2039ba0774afaa4f0 Mon Sep 17 00:00:00 2001 From: Igor Guzenko Date: Tue, 19 Mar 2019 21:11:41 +0200 Subject: [PATCH] DRILL-7115: Improve Hive schema show tables performance 1. To make SHOW TABLES for Hive schema work much faster, additional Drill feature of showing only accesible tables when Storage-Based authorization is enabled was sacrificed. Now the behaviour matches to Hive/Beeline, all tables will be shown despite of accessibility. For details about previous show tables results, check description of DRILL-540. 2. In HiveDatabaseSchema implemented faster getTableNamesAndTypes() method and removed bulk related code. 3. Deprecated bulk related options and removed bulk code from AbstractSchema, DrillHiveMetastoreClient. 4. For 8000 Hive tables query returned in 1.8 seconds, for combination of 4000 tables and 8000 views query returned in 2.3 seconds. Note, that after first query table names will be cached and next queries will perform in less than 1 sec. 5. Refactored WorkspaceSchemaFactory's getTableNamesAndTypes() method to reuse existing getViews() method. 6. DrillHiveMetastoreClient was refactored. Classes were unnested and enclosed within client package with restricted visibility. Also was updated cache values type to avoid unnecessarry List to Set back and forth conversions. Client creation methods moved to separate class. So the new package exposes only factory and client class. closes #1706 --- .../store/hive/DrillHiveMetaStoreClient.java | 586 ------------------ .../drill/exec/store/hive/HiveScan.java | 2 +- .../drill/exec/store/hive/HiveUtilities.java | 16 + .../hive/client/DatabaseNameCacheLoader.java | 75 +++ .../hive/client/DrillHiveMetaStoreClient.java | 106 ++++ .../DrillHiveMetaStoreClientFactory.java | 104 ++++ ...lHiveMetaStoreClientWithAuthorization.java | 110 ++++ .../{ => client}/HiveAuthorizationHelper.java | 43 +- .../store/hive/client/HiveMetadataCache.java | 172 +++++ .../hive/client/TableEntryCacheLoader.java | 111 ++++ .../exec/store/hive/client/TableName.java | 72 +++ .../hive/client/TableNameCacheLoader.java | 94 +++ .../store/hive/schema/HiveDatabaseSchema.java | 108 +--- .../store/hive/schema/HiveSchemaFactory.java | 9 +- .../TestStorageBasedHiveAuthorization.java | 183 +++--- .../org/apache/drill/exec/ExecConstants.java | 32 +- .../planner/sql/parser/DrillParserUtil.java | 14 +- .../drill/exec/store/AbstractSchema.java | 59 +- .../store/dfs/WorkspaceSchemaFactory.java | 46 +- .../ischema/InfoSchemaRecordGenerator.java | 29 +- 20 files changed, 1058 insertions(+), 913 deletions(-) delete mode 100644 contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/DrillHiveMetaStoreClient.java create mode 100644 contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DatabaseNameCacheLoader.java create mode 100644 contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DrillHiveMetaStoreClient.java create mode 100644 contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DrillHiveMetaStoreClientFactory.java create mode 100644 contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DrillHiveMetaStoreClientWithAuthorization.java rename contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/{ => client}/HiveAuthorizationHelper.java (85%) create mode 100644 contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/HiveMetadataCache.java create mode 100644 contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/TableEntryCacheLoader.java create mode 100644 contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/TableName.java create mode 100644 contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/TableNameCacheLoader.java diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/DrillHiveMetaStoreClient.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/DrillHiveMetaStoreClient.java deleted file mode 100644 index 588ca1170c8..00000000000 --- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/DrillHiveMetaStoreClient.java +++ /dev/null @@ -1,586 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.store.hive; - -import org.apache.drill.shaded.guava.com.google.common.base.Strings; -import org.apache.drill.shaded.guava.com.google.common.cache.CacheBuilder; -import org.apache.drill.shaded.guava.com.google.common.cache.CacheLoader; -import org.apache.drill.shaded.guava.com.google.common.cache.LoadingCache; -import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; -import org.apache.drill.shaded.guava.com.google.common.collect.Lists; -import org.apache.drill.common.exceptions.DrillRuntimeException; -import org.apache.drill.common.exceptions.UserException; -import org.apache.drill.exec.util.ImpersonationUtil; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.api.UnknownDBException; -import org.apache.hadoop.hive.metastore.api.UnknownTableException; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; -import org.apache.hadoop.hive.shims.Utils; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.thrift.TException; - -import java.io.IOException; -import java.security.PrivilegedExceptionAction; -import java.util.Collections; -import java.util.List; -import java.util.Objects; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; - -/** - * Override HiveMetaStoreClient to provide additional capabilities such as caching, reconnecting with user - * credentials and higher level APIs to get the metadata in form that Drill needs directly. - */ -public abstract class DrillHiveMetaStoreClient extends HiveMetaStoreClient { - private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(DrillHiveMetaStoreClient.class); - - public final String HIVE_METASTORE_CACHE_TTL = "hive.metastore.cache-ttl-seconds"; - public final String HIVE_METASTORE_CACHE_EXPIRE = "hive.metastore.cache-expire-after"; - public final String HIVE_METASTORE_CACHE_EXPIRE_AFTER_WRITE = "write"; - public final String HIVE_METASTORE_CACHE_EXPIRE_AFTER_ACCESS = "access"; - - protected final LoadingCache> databases; - protected final LoadingCache> tableNameLoader; - protected final LoadingCache tableLoaders; - - - /** - * Create a DrillHiveMetaStoreClient for cases where: - * 1. Drill impersonation is enabled and - * 2. either storage (in remote HiveMetaStore server) or SQL standard based authorization (in Hive storage plugin) - * is enabled - * @param processUserMetaStoreClient MetaStoreClient of process user. Useful for generating the delegation tokens when - * SASL (KERBEROS or custom SASL implementations) is enabled. - * @param hiveConf Conf including authorization configuration - * @param userName User who is trying to access the Hive metadata - * @return - * @throws MetaException - */ - public static DrillHiveMetaStoreClient createClientWithAuthz(final DrillHiveMetaStoreClient processUserMetaStoreClient, - final HiveConf hiveConf, final String userName) throws MetaException { - try { - boolean delegationTokenGenerated = false; - - final UserGroupInformation ugiForRpc; // UGI credentials to use for RPC communication with Hive MetaStore server - if (!hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS)) { - // If the user impersonation is disabled in Hive storage plugin (not Drill impersonation), use the process - // user UGI credentials. - ugiForRpc = ImpersonationUtil.getProcessUserUGI(); - } else { - ugiForRpc = ImpersonationUtil.createProxyUgi(userName); - if (hiveConf.getBoolVar(ConfVars.METASTORE_USE_THRIFT_SASL)) { - // When SASL is enabled for proxy user create a delegation token. Currently HiveMetaStoreClient can create - // client transport for proxy users only when the authentication mechanims is DIGEST (through use of - // delegation tokens). - String delegationToken = processUserMetaStoreClient.getDelegationToken(userName, userName); - try { - Utils.setTokenStr(ugiForRpc, delegationToken, HiveClientWithAuthzWithCaching.DRILL2HMS_TOKEN); - } catch (IOException e) { - throw new DrillRuntimeException("Couldn't setup delegation token in the UGI for Hive MetaStoreClient", e); - } - delegationTokenGenerated = true; - } - } - - final HiveConf hiveConfForClient; - if (delegationTokenGenerated) { - hiveConfForClient = new HiveConf(hiveConf); - hiveConfForClient.set("hive.metastore.token.signature", HiveClientWithAuthzWithCaching.DRILL2HMS_TOKEN); - } else { - hiveConfForClient = hiveConf; - } - - return ugiForRpc.doAs(new PrivilegedExceptionAction() { - @Override - public DrillHiveMetaStoreClient run() throws Exception { - return new HiveClientWithAuthzWithCaching(hiveConfForClient, ugiForRpc, userName); - } - }); - } catch (final Exception e) { - throw new DrillRuntimeException("Failure setting up HiveMetaStore client.", e); - } - } - - /** - * Create a DrillMetaStoreClient that can be shared across multiple users. This is created when impersonation is - * disabled. - * @param hiveConf - * @return - * @throws MetaException - */ - public static DrillHiveMetaStoreClient createCloseableClientWithCaching(final HiveConf hiveConf) - throws MetaException { - return new HiveClientWithCaching(hiveConf); - } - - private DrillHiveMetaStoreClient(final HiveConf hiveConf) throws MetaException { - super(hiveConf); - - int hmsCacheTTL = 60; // default is 60 seconds - boolean expireAfterWrite = true; // default is expire after write. - - final String ttl = hiveConf.get(HIVE_METASTORE_CACHE_TTL); - if (!Strings.isNullOrEmpty(ttl)) { - hmsCacheTTL = Integer.valueOf(ttl); - logger.warn("Hive metastore cache ttl is set to {} seconds.", hmsCacheTTL); - } - - final String expiry = hiveConf.get(HIVE_METASTORE_CACHE_EXPIRE); - if (!Strings.isNullOrEmpty(expiry)) { - if (expiry.equalsIgnoreCase(HIVE_METASTORE_CACHE_EXPIRE_AFTER_WRITE)) { - expireAfterWrite = true; - } else if (expiry.equalsIgnoreCase(HIVE_METASTORE_CACHE_EXPIRE_AFTER_ACCESS)) { - expireAfterWrite = false; - } - logger.warn("Hive metastore cache expire policy is set to {}", expireAfterWrite? "expireAfterWrite" : "expireAfterAccess"); - } - - final CacheBuilder cacheBuilder = CacheBuilder - .newBuilder(); - - if (expireAfterWrite) { - cacheBuilder.expireAfterWrite(hmsCacheTTL, TimeUnit.SECONDS); - } else { - cacheBuilder.expireAfterAccess(hmsCacheTTL, TimeUnit.SECONDS); - } - - databases = cacheBuilder.build(new DatabaseLoader()); - tableNameLoader = cacheBuilder.build(new TableNameLoader()); - tableLoaders = cacheBuilder.build(new TableLoader()); - - } - - /** - * Higher level API that returns the databases in Hive. - * @return - * @throws TException - */ - public abstract List getDatabases(boolean ignoreAuthzErrors) throws TException; - - /** - * Higher level API that returns the tables in given database. - * @param dbName - * @return - * @throws TException - */ - public abstract List getTableNames(final String dbName, boolean ignoreAuthzErrors) throws TException; - - /** - * Higher level API that returns the {@link HiveReadEntry} for given database and table. - * @param dbName - * @param tableName - * @return - * @throws TException - */ - public abstract HiveReadEntry getHiveReadEntry(final String dbName, final String tableName, boolean ignoreAuthzErrors) throws TException; - - /** Helper method which gets database. Retries once if the first call to fetch the metadata fails */ - protected static List getDatabasesHelper(final IMetaStoreClient mClient) throws TException { - try { - return mClient.getAllDatabases(); - } catch (MetaException e) { - /* - HiveMetaStoreClient is encapsulating both the MetaException/TExceptions inside MetaException. - Since we don't have good way to differentiate, we will close older connection and retry once. - This is only applicable for getAllTables and getAllDatabases method since other methods are - properly throwing correct exceptions. - */ - logger.warn("Failure while attempting to get hive databases. Retries once.", e); - try { - mClient.close(); - } catch (Exception ex) { - logger.warn("Failure while attempting to close existing hive metastore connection. May leak connection.", ex); - } - - // Attempt to reconnect. If this is a secure connection, this will fail due - // to the invalidation of the security token. In that case, throw the original - // exception and let a higher level clean up. Ideally we'd get a new token - // here, but doing so requires the use of a different connection, and that - // one has also become invalid. This code needs a rework; this is just a - // work-around. - - try { - mClient.reconnect(); - } catch (Exception e1) { - throw e; - } - return mClient.getAllDatabases(); - } - } - - /** Helper method which gets tables in a database. Retries once if the first call to fetch the metadata fails */ - protected static List getTableNamesHelper(final IMetaStoreClient mClient, final String dbName) - throws TException { - try { - return mClient.getAllTables(dbName); - } catch (MetaException e) { - /* - HiveMetaStoreClient is encapsulating both the MetaException/TExceptions inside MetaException. - Since we don't have good way to differentiate, we will close older connection and retry once. - This is only applicable for getAllTables and getAllDatabases method since other methods are - properly throwing correct exceptions. - */ - logger.warn("Failure while attempting to get hive tables. Retries once.", e); - try { - mClient.close(); - } catch (Exception ex) { - logger.warn("Failure while attempting to close existing hive metastore connection. May leak connection.", ex); - } - mClient.reconnect(); - return mClient.getAllTables(dbName); - } - } - - public static List getTablesByNamesByBulkLoadHelper( - final HiveMetaStoreClient mClient, final List tableNames, final String schemaName, - final int bulkSize) { - final int totalTables = tableNames.size(); - final List tables = Lists.newArrayList(); - - // In each round, Drill asks for a sub-list of all the requested tables - for (int fromIndex = 0; fromIndex < totalTables; fromIndex += bulkSize) { - final int toIndex = Math.min(fromIndex + bulkSize, totalTables); - final List eachBulkofTableNames = tableNames.subList(fromIndex, toIndex); - List eachBulkofTables; - // Retries once if the first call to fetch the metadata fails - try { - eachBulkofTables = DrillHiveMetaStoreClient.getTableObjectsByNameHelper(mClient, schemaName, eachBulkofTableNames); - } catch (Exception e) { - logger.warn("Exception occurred while trying to read tables from {}: {}", schemaName, e.getCause()); - return ImmutableList.of(); - } - tables.addAll(eachBulkofTables); - } - return tables; - } - - /** Helper method which gets table metadata. Retries once if the first call to fetch the metadata fails */ - protected static HiveReadEntry getHiveReadEntryHelper(final IMetaStoreClient mClient, final String dbName, - final String tableName) throws TException { - Table table = null; - try { - table = mClient.getTable(dbName, tableName); - } catch (MetaException | NoSuchObjectException e) { - throw e; - } catch (TException e) { - logger.warn("Failure while attempting to get hive table. Retries once. ", e); - try { - mClient.close(); - } catch (Exception ex) { - logger.warn("Failure while attempting to close existing hive metastore connection. May leak connection.", ex); - } - mClient.reconnect(); - table = mClient.getTable(dbName, tableName); - } - - if (table == null) { - throw new UnknownTableException(String.format("Unable to find table '%s'.", tableName)); - } - - List partitions; - try { - partitions = mClient.listPartitions(dbName, tableName, (short) -1); - } catch (NoSuchObjectException | MetaException e) { - throw e; - } catch (TException e) { - logger.warn("Failure while attempting to get hive partitions. Retries once. ", e); - try { - mClient.close(); - } catch (Exception ex) { - logger.warn("Failure while attempting to close existing hive metastore connection. May leak connection.", ex); - } - mClient.reconnect(); - partitions = mClient.listPartitions(dbName, tableName, (short) -1); - } - - List hivePartitionWrappers = Lists.newArrayList(); - HiveTableWithColumnCache hiveTable = new HiveTableWithColumnCache(table, new ColumnListsCache(table)); - for (Partition partition : partitions) { - hivePartitionWrappers.add(createPartitionWithSpecColumns(hiveTable, partition)); - } - - if (hivePartitionWrappers.isEmpty()) { - hivePartitionWrappers = null; - } - - return new HiveReadEntry(new HiveTableWrapper(hiveTable), hivePartitionWrappers); - } - - /** - * Helper method which stores partition columns in table columnListCache. If table columnListCache has exactly the - * same columns as partition, in partition stores columns index that corresponds to identical column list. - * If table columnListCache hasn't such column list, the column list adds to table columnListCache and in partition - * stores columns index that corresponds to column list. - * - * @param table hive table instance - * @param partition partition instance - * @return hive partition wrapper - */ - public static HiveTableWrapper.HivePartitionWrapper createPartitionWithSpecColumns(HiveTableWithColumnCache table, Partition partition) { - int listIndex = table.getColumnListsCache().addOrGet(partition.getSd().getCols()); - HivePartition hivePartition = new HivePartition(partition, listIndex); - HiveTableWrapper.HivePartitionWrapper hivePartitionWrapper = new HiveTableWrapper.HivePartitionWrapper(hivePartition); - return hivePartitionWrapper; - } - - /** - * Help method which gets hive tables for a given schema|DB name and a list of table names. - * Retries once if the first call fails with TExcption other than connection-lost problems. - * @param mClient - * @param schemaName - * @param tableNames - * @return list of hive table instances. - **/ - public static List
getTableObjectsByNameHelper(final HiveMetaStoreClient mClient, final String schemaName, - final List tableNames) throws TException { - try { - return mClient.getTableObjectsByName(schemaName, tableNames); - } catch (MetaException | InvalidOperationException | UnknownDBException e) { - throw e; - } catch (TException e) { - logger.warn("Failure while attempting to get tables by names. Retries once. ", e); - try { - mClient.close(); - } catch (Exception ex) { - logger.warn("Failure while attempting to close existing hive metastore connection. May leak connection.", ex); - } - mClient.reconnect(); - return mClient.getTableObjectsByName(schemaName, tableNames); - } - } - - /** - * HiveMetaStoreClient to create and maintain (reconnection cases) connection to Hive metastore with given user - * credentials and check authorization privileges if set. - */ - private static class HiveClientWithAuthzWithCaching extends DrillHiveMetaStoreClient { - public static final String DRILL2HMS_TOKEN = "DrillDelegationTokenForHiveMetaStoreServer"; - - private final UserGroupInformation ugiForRpc; - private HiveAuthorizationHelper authorizer; - - private HiveClientWithAuthzWithCaching(final HiveConf hiveConf, final UserGroupInformation ugiForRpc, - final String userName) throws TException { - super(hiveConf); - this.ugiForRpc = ugiForRpc; - this.authorizer = new HiveAuthorizationHelper(this, hiveConf, userName); - } - - @Override - public void reconnect() throws MetaException { - try { - ugiForRpc.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - reconnectSuper(); - return null; - } - }); - } catch (final InterruptedException | IOException e) { - throw new DrillRuntimeException("Failed to reconnect to HiveMetaStore: " + e.getMessage(), e); - } - } - - private void reconnectSuper() throws MetaException { - super.reconnect(); - } - - @Override - public List getDatabases(boolean ignoreAuthzErrors) throws TException { - try { - authorizer.authorizeShowDatabases(); - } catch (final HiveAccessControlException e) { - if (ignoreAuthzErrors) { - return Collections.emptyList(); - } - throw UserException.permissionError(e).build(logger); - } - - try { - return databases.get("databases"); - } catch (final ExecutionException e) { - throw new TException(e); - } - } - - @Override - public List getTableNames(final String dbName, boolean ignoreAuthzErrors) throws TException { - try { - authorizer.authorizeShowTables(dbName); - } catch (final HiveAccessControlException e) { - if (ignoreAuthzErrors) { - return Collections.emptyList(); - } - throw UserException.permissionError(e).build(logger); - } - - try { - return tableNameLoader.get(dbName); - } catch (final ExecutionException e) { - throw new TException(e); - } - } - - @Override - public HiveReadEntry getHiveReadEntry(final String dbName, final String tableName, boolean ignoreAuthzErrors) throws TException { - try { - authorizer.authorizeReadTable(dbName, tableName); - } catch (final HiveAccessControlException e) { - if (!ignoreAuthzErrors) { - throw UserException.permissionError(e).build(logger); - } - } - - try { - return tableLoaders.get(TableName.table(dbName,tableName)); - } catch (final ExecutionException e) { - throw new TException(e); - } - } - - } - - /** - * HiveMetaStoreClient that provides a shared MetaStoreClient implementation with caching. - */ - private static class HiveClientWithCaching extends DrillHiveMetaStoreClient { - private HiveClientWithCaching(final HiveConf hiveConf) throws MetaException { - super(hiveConf); - } - - @Override - public List getDatabases(boolean ignoreAuthzErrors) throws TException { - try { - return databases.get("databases"); - } catch (final ExecutionException e) { - throw new TException(e); - } - } - - @Override - public List getTableNames(final String dbName, boolean ignoreAuthzErrors) throws TException { - try { - return tableNameLoader.get(dbName); - } catch (final ExecutionException e) { - throw new TException(e); - } - } - - @Override - public HiveReadEntry getHiveReadEntry(final String dbName, final String tableName, boolean ignoreAuthzErrors) throws TException { - try { - return tableLoaders.get(TableName.table(dbName,tableName)); - } catch (final ExecutionException e) { - throw new TException(e); - } - } - - @Override - public String getDelegationToken(String owner, String renewerKerberosPrincipalName) throws TException { - synchronized (this) { - return super.getDelegationToken(owner, renewerKerberosPrincipalName); - } - } - - } - - private class DatabaseLoader extends CacheLoader> { - @Override - public List load(String key) throws Exception { - if (!"databases".equals(key)) { - throw new UnsupportedOperationException(); - } - synchronized (DrillHiveMetaStoreClient.this) { - return getDatabasesHelper(DrillHiveMetaStoreClient.this); - } - } - } - - private class TableNameLoader extends CacheLoader> { - @Override - public List load(String dbName) throws Exception { - synchronized (DrillHiveMetaStoreClient.this) { - return getTableNamesHelper(DrillHiveMetaStoreClient.this, dbName); - } - } - } - - private class TableLoader extends CacheLoader { - @Override - public HiveReadEntry load(TableName key) throws Exception { - synchronized (DrillHiveMetaStoreClient.this) { - return getHiveReadEntryHelper(DrillHiveMetaStoreClient.this, key.getDatabaseName(), key.getTableName()); - } - } - } - - static class TableName { - private final String databaseName; - private final String tableName; - - private TableName(String databaseName, String tableName) { - this.databaseName = databaseName; - this.tableName = tableName; - } - - public static TableName table(String databaseName, String tableName) { - return new TableName(databaseName, tableName); - } - - public String getDatabaseName() { - return databaseName; - } - - public String getTableName() { - return tableName; - } - - @Override - public String toString() { - return String.format("databaseName:%s, tableName:%s", databaseName, tableName).toString(); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - TableName other = (TableName) o; - return Objects.equals(databaseName, other.databaseName) && - Objects.equals(tableName, other.tableName); - } - - @Override - public int hashCode() { - return Objects.hash(databaseName, tableName); - } - } - -} diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java index dfa2ee39129..c7d04a9e7cc 100644 --- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java @@ -51,7 +51,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; -import static org.apache.drill.exec.store.hive.DrillHiveMetaStoreClient.createPartitionWithSpecColumns; +import static org.apache.drill.exec.store.hive.HiveUtilities.createPartitionWithSpecColumns; @JsonTypeName("hive-scan") public class HiveScan extends AbstractGroupScan { diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveUtilities.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveUtilities.java index 40b04f895da..3eb134d2071 100644 --- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveUtilities.java +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveUtilities.java @@ -56,6 +56,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -743,5 +744,20 @@ public static HiveConf generateHiveConf(HiveConf hiveConf, Map p return newHiveConf; } + /** + * Helper method which stores partition columns in table columnListCache. If table columnListCache has exactly the + * same columns as partition, in partition stores columns index that corresponds to identical column list. + * If table columnListCache hasn't such column list, the column list adds to table columnListCache and in partition + * stores columns index that corresponds to column list. + * + * @param table hive table instance + * @param partition partition instance + * @return hive partition wrapper + */ + public static HiveTableWrapper.HivePartitionWrapper createPartitionWithSpecColumns(HiveTableWithColumnCache table, Partition partition) { + int listIndex = table.getColumnListsCache().addOrGet(partition.getSd().getCols()); + return new HiveTableWrapper.HivePartitionWrapper(new HivePartition(partition, listIndex)); + } + } diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DatabaseNameCacheLoader.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DatabaseNameCacheLoader.java new file mode 100644 index 00000000000..c017399a3cd --- /dev/null +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DatabaseNameCacheLoader.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive.client; + +import java.util.List; + +import org.apache.drill.common.AutoCloseables; +import org.apache.drill.shaded.guava.com.google.common.cache.CacheLoader; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * CacheLoader that synchronized on client and tries to reconnect when + * client fails. Used by {@link HiveMetadataCache}. + */ +final class DatabaseNameCacheLoader extends CacheLoader> { + + private static final Logger logger = LoggerFactory.getLogger(DatabaseNameCacheLoader.class); + + private final DrillHiveMetaStoreClient client; + + DatabaseNameCacheLoader(DrillHiveMetaStoreClient client) { + this.client = client; + } + + @Override + @SuppressWarnings("NullableProblems") + public List load(String key) throws Exception { + synchronized (client) { + try { + return client.getAllDatabases(); + } catch (MetaException e) { + /* + HiveMetaStoreClient is encapsulating both the MetaException/TExceptions inside MetaException. + Since we don't have good way to differentiate, we will close older connection and retry once. + This is only applicable for getAllTables and getAllDatabases method since other methods are + properly throwing correct exceptions. + */ + logger.warn("Failure while attempting to get hive databases. Retries once.", e); + AutoCloseables.closeSilently(client::close); + try { + /* + Attempt to reconnect. If this is a secure connection, this will fail due + to the invalidation of the security token. In that case, throw the original + exception and let a higher level clean up. Ideally we'd get a new token + here, but doing so requires the use of a different connection, and that + one has also become invalid. This code needs a rework; this is just a + work-around. + */ + client.reconnect(); + } catch (Exception e1) { + throw e; + } + return client.getAllDatabases(); + } + } + } + +} diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DrillHiveMetaStoreClient.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DrillHiveMetaStoreClient.java new file mode 100644 index 00000000000..dd5cad01165 --- /dev/null +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DrillHiveMetaStoreClient.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive.client; + +import java.util.List; +import java.util.Map; + +import org.apache.calcite.schema.Schema.TableType; +import org.apache.drill.exec.store.hive.HiveReadEntry; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.thrift.TException; + +/** + * Extension of HiveMetaStoreClient with addition of cache and methods useful + * for Drill schema. Note, that access to parent class is synchronized either + * on cache loading level or in overridden methods, and the synchronization + * should not be neglected in child classes. + */ +public class DrillHiveMetaStoreClient extends HiveMetaStoreClient { + + /** + * Unified API for work with HiveMetaStoreClient + * client through local caches. + */ + private final HiveMetadataCache hiveMetadataCache; + + /** + * Package visibility performs two roles here: + * 1) ensure that child classes in same package; + * 2) ensure that instances published to other packages + * by {@link DrillHiveMetaStoreClientFactory}. + * + * @param hiveConf hive conf from storage plugin + * @throws MetaException when initialization failed + */ + DrillHiveMetaStoreClient(final HiveConf hiveConf) throws MetaException { + super(hiveConf); + hiveMetadataCache = new HiveMetadataCache(this, hiveConf); + } + + /** + * Lists all Hive database names. + * + * @param ignoreAuthzErrors whether authorization errors should be ignored + * @return list of Hive databases + * @throws TException when client fails + */ + public List getDatabases(boolean ignoreAuthzErrors) throws TException { + return hiveMetadataCache.getDbNames(); + } + + /** + * Returns table metadata for concrete table + * + * @param dbName name of database + * @param tableName name of table + * @return {@link HiveReadEntry} containing table meta like columns, partitions etc. + * @throws TException when client fails + */ + public HiveReadEntry getHiveReadEntry(final String dbName, final String tableName, boolean ignoreAuthzErrors) throws TException { + return hiveMetadataCache.getHiveReadEntry(dbName, tableName); + } + + /** + * Returns collection of view and table names along with their types. + * + * @param dbName name of database + * @param ignoreAuthzErrors hint for handling authorization errors + * @return map where keys are db object names values are types (VIEW or TABLE) + * @throws TException in case when if loader thrown ExecutionException + */ + public Map getTableNamesAndTypes(final String dbName, boolean ignoreAuthzErrors) throws TException { + return hiveMetadataCache.getTableNamesAndTypes(dbName); + } + + /** + * Overridden to enforce synchronization. + * + * @param owner the intended owner for the token + * @param renewerKerberosPrincipalName kerberos user + * @return the string of the token + * @throws TException when client fails + */ + @Override + public synchronized String getDelegationToken(String owner, String renewerKerberosPrincipalName) throws TException { + return super.getDelegationToken(owner, renewerKerberosPrincipalName); + } + +} diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DrillHiveMetaStoreClientFactory.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DrillHiveMetaStoreClientFactory.java new file mode 100644 index 00000000000..f392ba3d113 --- /dev/null +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DrillHiveMetaStoreClientFactory.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive.client; + +import java.io.IOException; +import java.security.PrivilegedExceptionAction; + +import org.apache.drill.common.exceptions.DrillRuntimeException; +import org.apache.drill.exec.util.ImpersonationUtil; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.shims.Utils; +import org.apache.hadoop.security.UserGroupInformation; + +/** + * Provides factory methods for initialization of {@link DrillHiveMetaStoreClient} instances. + */ +public final class DrillHiveMetaStoreClientFactory { + + private DrillHiveMetaStoreClientFactory() { + } + + /** + * Create a DrillHiveMetaStoreClient for cases where: + * 1. Drill impersonation is enabled and + * 2. either storage (in remote HiveMetaStore server) or SQL standard based authorization (in Hive storage plugin) + * is enabled + * + * @param processUserMetaStoreClient MetaStoreClient of process user. Useful for generating the delegation tokens when + * SASL (KERBEROS or custom SASL implementations) is enabled. + * @param hiveConf Conf including authorization configuration + * @param userName User who is trying to access the Hive metadata + * @return instance of client + */ + public static DrillHiveMetaStoreClient createClientWithAuthz(final DrillHiveMetaStoreClient processUserMetaStoreClient, + final HiveConf hiveConf, final String userName) { + try { + boolean delegationTokenGenerated = false; + + final UserGroupInformation ugiForRpc; // UGI credentials to use for RPC communication with Hive MetaStore server + if (!hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS)) { + // If the user impersonation is disabled in Hive storage plugin (not Drill impersonation), use the process + // user UGI credentials. + ugiForRpc = ImpersonationUtil.getProcessUserUGI(); + } else { + ugiForRpc = ImpersonationUtil.createProxyUgi(userName); + if (hiveConf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL)) { + // When SASL is enabled for proxy user create a delegation token. Currently HiveMetaStoreClient can create + // client transport for proxy users only when the authentication mechanims is DIGEST (through use of + // delegation tokens). + String delegationToken = processUserMetaStoreClient.getDelegationToken(userName, userName); + try { + Utils.setTokenStr(ugiForRpc, delegationToken, DrillHiveMetaStoreClientWithAuthorization.DRILL2HMS_TOKEN); + } catch (IOException e) { + throw new DrillRuntimeException("Couldn't setup delegation token in the UGI for Hive MetaStoreClient", e); + } + delegationTokenGenerated = true; + } + } + + final HiveConf hiveConfForClient; + if (delegationTokenGenerated) { + hiveConfForClient = new HiveConf(hiveConf); + hiveConfForClient.set("hive.metastore.token.signature", DrillHiveMetaStoreClientWithAuthorization.DRILL2HMS_TOKEN); + } else { + hiveConfForClient = hiveConf; + } + + return ugiForRpc.doAs((PrivilegedExceptionAction) + () -> new DrillHiveMetaStoreClientWithAuthorization(hiveConfForClient, ugiForRpc, userName)); + } catch (final Exception e) { + throw new DrillRuntimeException("Failure setting up HiveMetaStore client.", e); + } + } + + /** + * Create a DrillMetaStoreClient that can be shared across multiple users. This is created when impersonation is + * disabled. + * + * @param hiveConf hive properties set in Drill storage plugin + * @return instance of client + * @throws MetaException when initialization failed + */ + public static DrillHiveMetaStoreClient createCloseableClientWithCaching(final HiveConf hiveConf) + throws MetaException { + return new DrillHiveMetaStoreClient(hiveConf); + } + +} diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DrillHiveMetaStoreClientWithAuthorization.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DrillHiveMetaStoreClientWithAuthorization.java new file mode 100644 index 00000000000..dbb5772c645 --- /dev/null +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/DrillHiveMetaStoreClientWithAuthorization.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive.client; + +import java.io.IOException; +import java.security.PrivilegedExceptionAction; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import org.apache.calcite.schema.Schema; +import org.apache.drill.common.exceptions.DrillRuntimeException; +import org.apache.drill.common.exceptions.UserException; +import org.apache.drill.exec.store.hive.HiveReadEntry; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Provides authorization verifications when SQL Standard Based authorization is + * enabled. In another case (Storage Based authorization) Hive client just won't allow unauthorized + * access for reading tables. + */ +class DrillHiveMetaStoreClientWithAuthorization extends DrillHiveMetaStoreClient { + + private static final Logger logger = LoggerFactory.getLogger(DrillHiveMetaStoreClientWithAuthorization.class); + + static final String DRILL2HMS_TOKEN = "DrillDelegationTokenForHiveMetaStoreServer"; + + private final UserGroupInformation ugiForRpc; + + private final HiveAuthorizationHelper authorizer; + + DrillHiveMetaStoreClientWithAuthorization(final HiveConf hiveConf, final UserGroupInformation ugiForRpc, + final String userName) throws TException { + super(hiveConf); + this.ugiForRpc = ugiForRpc; + this.authorizer = new HiveAuthorizationHelper(this, hiveConf, userName); + } + + @Override + public List getDatabases(boolean ignoreAuthzErrors) throws TException { + try { + authorizer.authorizeShowDatabases(); + } catch (final HiveAccessControlException e) { + if (ignoreAuthzErrors) { + return Collections.emptyList(); + } + throw UserException.permissionError(e).build(logger); + } + return super.getDatabases(ignoreAuthzErrors); + } + + @Override + public HiveReadEntry getHiveReadEntry(final String dbName, final String tableName, boolean ignoreAuthzErrors) throws TException { + try { + authorizer.authorizeReadTable(dbName, tableName); + } catch (final HiveAccessControlException e) { + if (!ignoreAuthzErrors) { + throw UserException.permissionError(e).build(logger); + } + } + return super.getHiveReadEntry(dbName, tableName, ignoreAuthzErrors); + } + + @Override + public Map getTableNamesAndTypes(String dbName, boolean ignoreAuthzErrors) throws TException { + try { + authorizer.authorizeShowTables(dbName); + } catch (final HiveAccessControlException e) { + if (ignoreAuthzErrors) { + return Collections.emptyMap(); + } + throw UserException.permissionError(e).build(logger); + } + return super.getTableNamesAndTypes(dbName, ignoreAuthzErrors); + } + + @Override + public void reconnect() throws MetaException { + try { + ugiForRpc.doAs((PrivilegedExceptionAction) () -> { + super.reconnect(); + return null; + }); + } catch (final InterruptedException | IOException e) { + throw new DrillRuntimeException("Failed to reconnect to HiveMetaStore: " + e.getMessage(), e); + } + } + +} diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveAuthorizationHelper.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/HiveAuthorizationHelper.java similarity index 85% rename from contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveAuthorizationHelper.java rename to contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/HiveAuthorizationHelper.java index 29f345fe0a9..e2dad5a106c 100644 --- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveAuthorizationHelper.java +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/HiveAuthorizationHelper.java @@ -15,10 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.drill.exec.store.hive; +package org.apache.drill.exec.store.hive.client; + +import java.util.Collections; +import java.util.List; -import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; import org.apache.drill.common.exceptions.DrillRuntimeException; +import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.IMetaStoreClient; @@ -29,29 +32,35 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizer; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerFactory; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext.CLIENT_TYPE; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveMetastoreClientFactory; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; import org.apache.hadoop.hive.ql.session.SessionState; -import java.util.Collections; -import java.util.List; - /** * Helper class for initializing and checking privileges according to authorization configuration set in Hive storage * plugin config. */ -public class HiveAuthorizationHelper { +class HiveAuthorizationHelper { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HiveAuthorizationHelper.class); + /** + * Currently marks that SQLStdBasedAuthorization is enabled. + * Then some operations on {@link DrillHiveMetaStoreClientWithAuthorization} will use + * {@link HiveAuthorizationHelper#authorizerV2} to authorize action before + * making request to MetaStore client. + */ final boolean authzEnabled; + + /** + * Authorizer used when SQLStdBasedAuthorization is enabled. + */ final HiveAuthorizer authorizerV2; - public HiveAuthorizationHelper(final IMetaStoreClient mClient, final HiveConf hiveConf, final String user) { + HiveAuthorizationHelper(final IMetaStoreClient mClient, final HiveConf hiveConf, final String user) { authzEnabled = hiveConf.getBoolVar(ConfVars.HIVE_AUTHORIZATION_ENABLED); if (!authzEnabled) { authorizerV2 = null; @@ -76,12 +85,7 @@ public HiveAuthorizationHelper(final IMetaStoreClient mClient, final HiveConf hi authzContextBuilder.setClientType(CLIENT_TYPE.HIVESERVER2); // Drill is emulating HS2 here authorizerV2 = authorizerFactory.createHiveAuthorizer( - new HiveMetastoreClientFactory() { - @Override - public IMetaStoreClient getHiveMetastoreClient() throws HiveAuthzPluginException { - return mClient; - } - }, + () -> mClient, hiveConf, authenticator, authzContextBuilder.build()); authorizerV2.applyAuthorizationConfigPolicy(hiveConfCopy); @@ -96,7 +100,7 @@ public IMetaStoreClient getHiveMetastoreClient() throws HiveAuthzPluginException * Check authorization for "SHOW DATABASES" command. A {@link HiveAccessControlException} is thrown * for illegal access. */ - public void authorizeShowDatabases() throws HiveAccessControlException { + void authorizeShowDatabases() throws HiveAccessControlException { if (!authzEnabled) { return; } @@ -109,7 +113,7 @@ public void authorizeShowDatabases() throws HiveAccessControlException { * for illegal access. * @param dbName */ - public void authorizeShowTables(final String dbName) throws HiveAccessControlException { + void authorizeShowTables(final String dbName) throws HiveAccessControlException { if (!authzEnabled) { return; } @@ -125,7 +129,7 @@ public void authorizeShowTables(final String dbName) throws HiveAccessControlExc * @param dbName * @param tableName */ - public void authorizeReadTable(final String dbName, final String tableName) throws HiveAccessControlException { + void authorizeReadTable(final String dbName, final String tableName) throws HiveAccessControlException { if (!authzEnabled) { return; } @@ -136,7 +140,7 @@ public void authorizeReadTable(final String dbName, final String tableName) thro /* Helper method to check privileges */ private void authorize(final HiveOperationType hiveOpType, final List toRead, - final List toWrite, final String cmd) throws HiveAccessControlException { + final List toWrite, final String cmd) throws HiveAccessControlException { try { HiveAuthzContext.Builder authzContextBuilder = new HiveAuthzContext.Builder(); authzContextBuilder.setUserIpAddress("Not available"); @@ -149,4 +153,5 @@ private void authorize(final HiveOperationType hiveOpType, final List> dbNamesCache; + + /** + * Cache where each key is name of database and value is map + * where each key is table or view name and value indicates type + * {@link Schema.TableType#TABLE} or {@link Schema.TableType#VIEW} + * accordingly. + */ + private final LoadingCache> tableNamesCache; + + /** + * Cache where key is combination of db and table names and value + * is entry containing Hive table metadata. + */ + private final LoadingCache tableEntryCache; + + HiveMetadataCache(DrillHiveMetaStoreClient client, HiveConf hiveConf) { + final CacheBuilder cacheBuilder = isExpireAfterWrite(hiveConf) + ? CacheBuilder.newBuilder().expireAfterWrite(getHmsCacheTTL(hiveConf), TimeUnit.SECONDS) + : CacheBuilder.newBuilder().expireAfterAccess(getHmsCacheTTL(hiveConf), TimeUnit.SECONDS); + + dbNamesCache = cacheBuilder.build(new DatabaseNameCacheLoader(client)); + tableNamesCache = cacheBuilder.build(new TableNameCacheLoader(client)); + tableEntryCache = cacheBuilder.build(new TableEntryCacheLoader(client)); + } + + /** + * List db names defined in Hive. + * + * @return names of databases defined in Hive + * @throws TException when loading failed + */ + List getDbNames() throws TException { + try { + return dbNamesCache.get("databases"); + } catch (ExecutionException e) { + throw new TException(e); + } + } + + /** + * Gets collection of names and types of table objects + * defined in requested database. + * + * @param dbName database name + * @return collection of names and types of table objects + * @throws TException when loading failed + */ + Map getTableNamesAndTypes(String dbName) throws TException { + try { + return tableNamesCache.get(dbName); + } catch (ExecutionException e) { + throw new TException(e); + } + } + + /** + * Get entry metadata for conrete table or view. + * + * @param dbName database name + * @return set of table and view names + * @throws TException when loading failed + */ + HiveReadEntry getHiveReadEntry(String dbName, String tableName) throws TException { + try { + return tableEntryCache.get(TableName.of(dbName, tableName)); + } catch (final ExecutionException e) { + throw new TException(e); + } + } + + /** + * By default cache expiring policy is expire after write, + * but user can change it to expire after access by setting + * {@link HiveMetadataCache#HIVE_METASTORE_CACHE_EXPIRE} option + * value to 'access' for Drill's Hive storage plugin. + * + * @param hiveConf hive conf for the storage plugin + * @return flag defining expiration policy + */ + private boolean isExpireAfterWrite(HiveConf hiveConf) { + boolean expireAfterWrite = true; // default is expire after write. + final String expiry = hiveConf.get(HIVE_METASTORE_CACHE_EXPIRE); + if (HIVE_METASTORE_CACHE_EXPIRE_AFTER_ACCESS.equalsIgnoreCase(expiry)) { + expireAfterWrite = false; + logger.warn("Hive metastore cache expire policy is set to {}", "expireAfterAccess"); + } + return expireAfterWrite; + } + + /** + * By default cache entry TTL is set to 60 seconds, + * but user can change it using {@link HiveMetadataCache#HIVE_METASTORE_CACHE_TTL} + * property of Drill's Hive storage plugin. + * + * @param hiveConf hive conf for the storage plugin + * @return cache entry TTL in seconds + */ + private int getHmsCacheTTL(HiveConf hiveConf) { + int hmsCacheTTL = 60; // default is 60 seconds + final String ttl = hiveConf.get(HIVE_METASTORE_CACHE_TTL); + if (!Strings.isNullOrEmpty(ttl)) { + hmsCacheTTL = Integer.valueOf(ttl); + logger.warn("Hive metastore cache ttl is set to {} seconds.", hmsCacheTTL); + } + return hmsCacheTTL; + } + +} diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/TableEntryCacheLoader.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/TableEntryCacheLoader.java new file mode 100644 index 00000000000..2d32faf9801 --- /dev/null +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/TableEntryCacheLoader.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive.client; + +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.drill.common.AutoCloseables; +import org.apache.drill.exec.store.hive.ColumnListsCache; +import org.apache.drill.exec.store.hive.HiveReadEntry; +import org.apache.drill.exec.store.hive.HiveTableWithColumnCache; +import org.apache.drill.exec.store.hive.HiveTableWrapper; +import org.apache.drill.exec.store.hive.HiveUtilities; +import org.apache.drill.shaded.guava.com.google.common.cache.CacheLoader; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.UnknownTableException; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * CacheLoader that synchronized on client and tries to reconnect when + * client fails. Used by {@link HiveMetadataCache}. + */ +final class TableEntryCacheLoader extends CacheLoader { + + private static final Logger logger = LoggerFactory.getLogger(TableNameCacheLoader.class); + + private final DrillHiveMetaStoreClient client; + + TableEntryCacheLoader(DrillHiveMetaStoreClient client) { + this.client = client; + } + + + @Override + @SuppressWarnings("NullableProblems") + public HiveReadEntry load(TableName key) throws Exception { + Table table; + List partitions; + synchronized (client) { + table = getTable(key); + partitions = getPartitions(key); + } + HiveTableWithColumnCache hiveTable = new HiveTableWithColumnCache(table, new ColumnListsCache(table)); + List partitionWrappers = getPartitionWrappers(partitions, hiveTable); + return new HiveReadEntry(new HiveTableWrapper(hiveTable), partitionWrappers); + } + + private List getPartitionWrappers(List partitions, HiveTableWithColumnCache hiveTable) { + if (partitions.isEmpty()) { + return null; + } + return partitions.stream() + .map(partition -> HiveUtilities.createPartitionWithSpecColumns(hiveTable, partition)) + .collect(Collectors.toList()); + } + + private List getPartitions(TableName key) throws TException { + List partitions; + try { + partitions = client.listPartitions(key.getDbName(), key.getTableName(), (short) -1); + } catch (NoSuchObjectException | MetaException e) { + throw e; + } catch (TException e) { + logger.warn("Failure while attempting to get hive partitions. Retries once. ", e); + AutoCloseables.closeSilently(client::close); + client.reconnect(); + partitions = client.listPartitions(key.getDbName(), key.getTableName(), (short) -1); + } + return partitions; + } + + private Table getTable(TableName key) throws TException { + Table table; + try { + table = client.getTable(key.getDbName(), key.getTableName()); + } catch (MetaException | NoSuchObjectException e) { + throw e; + } catch (TException e) { + logger.warn("Failure while attempting to get hive table. Retries once. ", e); + AutoCloseables.closeSilently(client::close); + client.reconnect(); + table = client.getTable(key.getDbName(), key.getTableName()); + } + + if (table == null) { + throw new UnknownTableException(String.format("Unable to find table '%s'.", key.getTableName())); + } + return table; + } + +} diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/TableName.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/TableName.java new file mode 100644 index 00000000000..4a6e22fcf9b --- /dev/null +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/TableName.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive.client; + +import java.util.Objects; + +/** + * Combination of database and table names used + * to represent key for getting table data from cache. + */ +final class TableName { + + private final String dbName; + private final String tableName; + + private TableName(String dbName, String tableName) { + this.dbName = dbName; + this.tableName = tableName; + } + + public static TableName of(String dbName, String tableName) { + return new TableName(dbName, tableName); + } + + public String getDbName() { + return dbName; + } + + public String getTableName() { + return tableName; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof TableName)) { + return false; + } + + TableName other = (TableName) o; + return Objects.equals(dbName, other.dbName) + && Objects.equals(tableName, other.tableName); + } + + @Override + public int hashCode() { + return Objects.hash(dbName, tableName); + } + + @Override + public String toString() { + return String.format("dbName:%s, tableName:%s", dbName, tableName); + } + +} diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/TableNameCacheLoader.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/TableNameCacheLoader.java new file mode 100644 index 00000000000..19de2aab123 --- /dev/null +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/client/TableNameCacheLoader.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive.client; + +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; + +import org.apache.calcite.schema.Schema.TableType; +import org.apache.drill.common.AutoCloseables; +import org.apache.drill.shaded.guava.com.google.common.cache.CacheLoader; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW; + +/** + * CacheLoader that synchronized on client and tries to reconnect when + * client fails. Used by {@link HiveMetadataCache}. + */ +final class TableNameCacheLoader extends CacheLoader> { + + private static final Logger logger = LoggerFactory.getLogger(TableNameCacheLoader.class); + + private final DrillHiveMetaStoreClient client; + + TableNameCacheLoader(DrillHiveMetaStoreClient client) { + this.client = client; + } + + @Override + @SuppressWarnings("NullableProblems") + public Map load(String dbName) throws Exception { + List tableAndViewNames; + final Set viewNames = new HashSet<>(); + synchronized (client) { + try { + tableAndViewNames = client.getAllTables(dbName); + viewNames.addAll(client.getTables(dbName, "*", VIRTUAL_VIEW)); + } catch (MetaException e) { + /* + HiveMetaStoreClient is encapsulating both the MetaException/TExceptions inside MetaException. + Since we don't have good way to differentiate, we will close older connection and retry once. + This is only applicable for getAllTables and getAllDatabases method since other methods are + properly throwing correct exceptions. + */ + logger.warn("Failure while attempting to get hive tables. Retries once.", e); + AutoCloseables.closeSilently(client::close); + client.reconnect(); + tableAndViewNames = client.getAllTables(dbName); + viewNames.addAll(client.getTables(dbName, "*", VIRTUAL_VIEW)); + } + } + Map result = tableAndViewNames.stream() + .collect(Collectors.toMap(Function.identity(), getValueMapper(viewNames))); + return Collections.unmodifiableMap(result); + } + + /** + * Creates function used to map table or view name to appropriate + * {@link TableType} value based on set of view names. + * + * @param viewNames set of view names + * @return mapping function + */ + private Function getValueMapper(Set viewNames) { + if (viewNames.isEmpty()) { + return tableName -> TableType.TABLE; + } else { + return tableName -> viewNames.contains(tableName) ? TableType.VIEW : TableType.TABLE; + } + } + +} diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/schema/HiveDatabaseSchema.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/schema/HiveDatabaseSchema.java index 7344ef2888e..44213791181 100644 --- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/schema/HiveDatabaseSchema.java +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/schema/HiveDatabaseSchema.java @@ -17,34 +17,29 @@ */ package org.apache.drill.exec.store.hive.schema; -import org.apache.drill.shaded.guava.com.google.common.collect.Lists; -import org.apache.drill.shaded.guava.com.google.common.collect.Sets; -import org.apache.calcite.config.CalciteConnectionConfig; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.schema.Schema; -import org.apache.calcite.schema.Statistic; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; +import java.util.Set; + import org.apache.calcite.schema.Table; -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlNode; -import org.apache.commons.lang3.tuple.Pair; import org.apache.drill.exec.store.AbstractSchema; import org.apache.drill.exec.store.SchemaConfig; -import org.apache.drill.exec.store.hive.DrillHiveMetaStoreClient; +import org.apache.drill.exec.store.hive.client.DrillHiveMetaStoreClient; import org.apache.drill.exec.store.hive.HiveStoragePluginConfig; import org.apache.drill.exec.store.hive.schema.HiveSchemaFactory.HiveSchema; import org.apache.thrift.TException; -import java.util.List; -import java.util.Set; - public class HiveDatabaseSchema extends AbstractSchema { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HiveDatabaseSchema.class); private final HiveSchema hiveSchema; - private Set tables; + + private Map tables; + private final DrillHiveMetaStoreClient mClient; + private final SchemaConfig schemaConfig; public HiveDatabaseSchema( @@ -63,17 +58,28 @@ public Table getTable(String tableName) { return hiveSchema.getDrillTable(this.name, tableName); } + @Override + public Collection> getTableNamesAndTypes() { + ensureInitTables(); + return tables.entrySet(); + } + @Override public Set getTableNames() { + ensureInitTables(); + return tables.keySet(); + } + + private void ensureInitTables() { if (tables == null) { try { - tables = Sets.newHashSet(mClient.getTableNames(this.name, schemaConfig.getIgnoreAuthErrors())); - } catch (final TException e) { - logger.warn("Failure while attempting to access HiveDatabase '{}'.", this.name, e.getCause()); - tables = Sets.newHashSet(); // empty set. + tables = mClient.getTableNamesAndTypes(this.name, schemaConfig.getIgnoreAuthErrors()); + } catch (TException e) { + logger.warn(String.format( + "Exception was thrown while getting table names and type for db '%s'.", this.name), e); + tables = Collections.emptyMap(); } } - return tables; } @Override @@ -81,71 +87,9 @@ public String getTypeName() { return HiveStoragePluginConfig.NAME; } - @Override - public List> getTablesByNamesByBulkLoad(final List tableNames, - final int bulkSize) { - final String schemaName = getName(); - final List tables = DrillHiveMetaStoreClient - .getTablesByNamesByBulkLoadHelper(mClient, tableNames, schemaName, bulkSize); - - final List> tableNameToTable = Lists.newArrayList(); - for (final org.apache.hadoop.hive.metastore.api.Table table : tables) { - if (table == null) { - continue; - } - - final String tableName = table.getTableName(); - final TableType tableType; - if (table.getTableType().equals(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW.toString())) { - tableType = TableType.VIEW; - } else { - tableType = TableType.TABLE; - } - tableNameToTable.add(Pair.of(tableName, new HiveTableWithoutStatisticAndRowType(tableType))); - } - return tableNameToTable; - } - @Override public boolean areTableNamesCaseSensitive() { return false; } - private static class HiveTableWithoutStatisticAndRowType implements Table { - - private final TableType tableType; - - HiveTableWithoutStatisticAndRowType(final TableType tableType) { - this.tableType = tableType; - } - - @Override - public RelDataType getRowType(RelDataTypeFactory typeFactory) { - throw new UnsupportedOperationException( - "RowType was not retrieved when this table had been being requested"); - } - - @Override - public Statistic getStatistic() { - throw new UnsupportedOperationException( - "Statistic was not retrieved when this table had been being requested"); - } - - @Override - public Schema.TableType getJdbcTableType() { - return tableType; - } - - @Override - public boolean rolledUpColumnValidInsideAgg(String column, - SqlCall call, SqlNode parent, CalciteConnectionConfig config) { - return true; - } - - @Override - public boolean isRolledUp(String column) { - return false; - } - } - } diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/schema/HiveSchemaFactory.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/schema/HiveSchemaFactory.java index 402dda26b21..f63c503e4b7 100644 --- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/schema/HiveSchemaFactory.java +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/schema/HiveSchemaFactory.java @@ -34,10 +34,11 @@ import org.apache.drill.exec.store.AbstractSchema; import org.apache.drill.exec.store.AbstractSchemaFactory; import org.apache.drill.exec.store.SchemaConfig; -import org.apache.drill.exec.store.hive.DrillHiveMetaStoreClient; import org.apache.drill.exec.store.hive.HiveReadEntry; import org.apache.drill.exec.store.hive.HiveStoragePlugin; import org.apache.drill.exec.store.hive.HiveStoragePluginConfig; +import org.apache.drill.exec.store.hive.client.DrillHiveMetaStoreClient; +import org.apache.drill.exec.store.hive.client.DrillHiveMetaStoreClientFactory; import org.apache.drill.shaded.guava.com.google.common.cache.CacheBuilder; import org.apache.drill.shaded.guava.com.google.common.cache.CacheLoader; import org.apache.drill.shaded.guava.com.google.common.cache.LoadingCache; @@ -73,7 +74,7 @@ public HiveSchemaFactory(final HiveStoragePlugin plugin, final String name, fina try { // TODO: DRILL-6412. Clients for plugin should be instantiated only for the case, when plugin is enabled processUserMetastoreClient = - DrillHiveMetaStoreClient.createCloseableClientWithCaching(hiveConf); + DrillHiveMetaStoreClientFactory.createCloseableClientWithCaching(hiveConf); } catch (MetaException e) { throw new ExecutionSetupException("Failure setting up Hive metastore client.", e); } @@ -88,8 +89,8 @@ public HiveSchemaFactory(final HiveStoragePlugin plugin, final String name, fina }) .build(new CacheLoader() { @Override - public DrillHiveMetaStoreClient load(String userName) throws Exception { - return DrillHiveMetaStoreClient.createClientWithAuthz(processUserMetastoreClient, hiveConf, userName); + public DrillHiveMetaStoreClient load(String userName) { + return DrillHiveMetaStoreClientFactory.createClientWithAuthz(processUserMetastoreClient, hiveConf, userName); } }); } diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/TestStorageBasedHiveAuthorization.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/TestStorageBasedHiveAuthorization.java index 684519ad350..e721ab14522 100644 --- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/TestStorageBasedHiveAuthorization.java +++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/TestStorageBasedHiveAuthorization.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.calcite.schema.Schema.TableType; @@ -38,7 +39,6 @@ import org.junit.Test; import org.junit.experimental.categories.Category; -import static java.util.Collections.emptyList; import static org.apache.drill.exec.hive.HiveTestUtilities.executeQuery; import static org.apache.drill.shaded.guava.com.google.common.collect.Lists.newArrayList; import static org.apache.hadoop.fs.FileSystem.FS_DEFAULT_NAME_KEY; @@ -70,6 +70,29 @@ public class TestStorageBasedHiveAuthorization extends BaseTestHiveImpersonation private static final String g_voter_all_755 = "voter_all_755"; private static final String g_partitioned_student_u0_700 = "partitioned_student_u0_700"; + private static final List all_tables_in_db_general = ImmutableList.of( + g_student_u0_700, + g_vw_g_student_u0_700, + g_student_u0g0_750, + g_student_all_755, + g_voter_u1_700, + g_voter_u2g1_750, + g_voter_all_755, + g_partitioned_student_u0_700 + ); + + private static final List all_tables_type_in_db_general = ImmutableList.of( + TableType.TABLE, + TableType.VIEW, + TableType.TABLE, + TableType.TABLE, + TableType.TABLE, + TableType.TABLE, + TableType.TABLE, + TableType.TABLE + ); + + // DB whose warehouse directory has permissions 700 and owned by user0 private static final String db_u0_only = "db_u0_only"; @@ -78,6 +101,18 @@ public class TestStorageBasedHiveAuthorization extends BaseTestHiveImpersonation private static final String u0_voter_all_755 = "voter_all_755"; private static final String u0_vw_voter_all_755 = "vw_voter_all_755"; + private static final List all_tables_in_db_u0_only = ImmutableList.of( + u0_student_all_755, + u0_voter_all_755, + u0_vw_voter_all_755 + ); + + private static final List all_tables_type_in_db_u0_only = ImmutableList.of( + TableType.TABLE, + TableType.TABLE, + TableType.VIEW + ); + // DB whose warehouse directory has permissions 750 and owned by user1 and group1 private static final String db_u1g1_only = "db_u1g1_only"; @@ -87,6 +122,21 @@ public class TestStorageBasedHiveAuthorization extends BaseTestHiveImpersonation private static final String u1g1_voter_all_755 = "voter_all_755"; private static final String u1g1_voter_u1_700 = "voter_u1_700"; + private static final List all_tables_in_db_u1g1_only = ImmutableList.of( + u1g1_student_all_755, + u1g1_student_u1_700, + u1g1_voter_all_755, + u1g1_voter_u1_700 + ); + + private static final List all_tables_type_db_u1g1_only = ImmutableList.of( + TableType.TABLE, + TableType.TABLE, + TableType.TABLE, + TableType.TABLE + ); + + // Create a view on "student_u0_700". View is owned by user0:group0 and has permissions 750 private static final String v_student_u0g0_750 = "v_student_u0g0_750"; @@ -330,24 +380,13 @@ public void showSchemas() throws Exception { @Test public void user0_db_general_showTables() throws Exception { updateClient(org1Users[0]); - showTablesHelper(db_general, ImmutableList.of( - g_student_u0_700, - g_student_u0g0_750, - g_student_all_755, - g_voter_all_755, - g_partitioned_student_u0_700, - g_vw_g_student_u0_700 - )); + showTablesHelper(db_general, all_tables_in_db_general); } @Test public void user0_db_u0_only_showTables() throws Exception { updateClient(org1Users[0]); - showTablesHelper(db_u0_only, ImmutableList.of( - u0_student_all_755, - u0_voter_all_755, - u0_vw_voter_all_755 - )); + showTablesHelper(db_u0_only, all_tables_in_db_u0_only); } /** @@ -357,51 +396,29 @@ public void user0_db_u0_only_showTables() throws Exception { @Test public void user0_db_u1g1_only_showTables() throws Exception { updateClient(org1Users[0]); - showTablesHelper(db_u1g1_only, emptyList()); + showTablesHelper(db_u1g1_only, all_tables_in_db_u1g1_only); } @Test public void user0_db_general_infoSchema() throws Exception { updateClient(org1Users[0]); fromInfoSchemaHelper(db_general, - ImmutableList.of( - g_student_u0_700, - g_student_u0g0_750, - g_student_all_755, - g_voter_all_755, - g_partitioned_student_u0_700, - g_vw_g_student_u0_700 - ), - ImmutableList.of( - TableType.TABLE, - TableType.TABLE, - TableType.TABLE, - TableType.TABLE, - TableType.TABLE, - TableType.VIEW - )); + all_tables_in_db_general, + all_tables_type_in_db_general); } @Test public void user0_db_u0_only_infoSchema() throws Exception { updateClient(org1Users[0]); fromInfoSchemaHelper(db_u0_only, - ImmutableList.of( - u0_student_all_755, - u0_voter_all_755, - u0_vw_voter_all_755 - ), - ImmutableList.of( - TableType.TABLE, - TableType.TABLE, - TableType.VIEW - )); + all_tables_in_db_u0_only, + all_tables_type_in_db_u0_only); } @Test public void user0_db_u1g1_only_infoSchema() throws Exception { updateClient(org1Users[0]); - fromInfoSchemaHelper(db_u1g1_only, emptyList(), emptyList()); + fromInfoSchemaHelper(db_u1g1_only, all_tables_in_db_u1g1_only, all_tables_type_db_u1g1_only); } /** @@ -565,53 +582,27 @@ public void user0_forbidden_v_partitioned_student_u1g1_750() throws Exception { @Test public void user1_db_general_showTables() throws Exception { updateClient(org1Users[1]); - showTablesHelper(db_general, ImmutableList.of( - g_student_u0g0_750, - g_student_all_755, - g_voter_u1_700, - g_voter_u2g1_750, - g_voter_all_755, - g_vw_g_student_u0_700 - )); + showTablesHelper(db_general, all_tables_in_db_general); } @Test public void user1_db_u1g1_only_showTables() throws Exception { updateClient(org1Users[1]); - showTablesHelper(db_u1g1_only, ImmutableList.of( - u1g1_student_all_755, - u1g1_student_u1_700, - u1g1_voter_all_755, - u1g1_voter_u1_700 - )); + showTablesHelper(db_u1g1_only, all_tables_in_db_u1g1_only); } @Test public void user1_db_u0_only_showTables() throws Exception { updateClient(org1Users[1]); - showTablesHelper(db_u0_only, newArrayList(u0_vw_voter_all_755)); + showTablesHelper(db_u0_only, all_tables_in_db_u0_only); } @Test public void user1_db_general_infoSchema() throws Exception { updateClient(org1Users[1]); fromInfoSchemaHelper(db_general, - ImmutableList.of( - g_student_u0g0_750, - g_student_all_755, - g_voter_u1_700, - g_voter_u2g1_750, - g_voter_all_755, - g_vw_g_student_u0_700 - ), - ImmutableList.of( - TableType.TABLE, - TableType.TABLE, - TableType.TABLE, - TableType.TABLE, - TableType.TABLE, - TableType.VIEW - )); + all_tables_in_db_general, + all_tables_type_in_db_general); } @Test @@ -636,7 +627,8 @@ public void user1_db_u1g1_only_infoSchema() throws Exception { public void user1_db_u0_only_infoSchema() throws Exception { updateClient(org1Users[1]); fromInfoSchemaHelper(db_u0_only, - newArrayList(u0_vw_voter_all_755), newArrayList(TableType.VIEW)); + newArrayList(u0_vw_voter_all_755, u0_student_all_755, u0_voter_all_755), + newArrayList(TableType.VIEW, TableType.TABLE, TableType.TABLE)); } /** @@ -738,66 +730,43 @@ public void user1_allowed_v_partitioned_student_u1g1_750() throws Exception { @Test public void user2_db_general_showTables() throws Exception { updateClient(org1Users[2]); - showTablesHelper(db_general, ImmutableList.of( - g_student_all_755, - g_voter_u2g1_750, - g_voter_all_755, - g_vw_g_student_u0_700 - )); + showTablesHelper(db_general, all_tables_in_db_general); } @Test public void user2_db_u1g1_only_showTables() throws Exception { updateClient(org1Users[2]); - showTablesHelper(db_u1g1_only, ImmutableList.of( - u1g1_student_all_755, - u1g1_voter_all_755 - )); + showTablesHelper(db_u1g1_only, all_tables_in_db_u1g1_only); } @Test public void user2_db_u0_only_showTables() throws Exception { updateClient(org1Users[2]); - showTablesHelper(db_u0_only, newArrayList(u0_vw_voter_all_755)); + showTablesHelper(db_u0_only, all_tables_in_db_u0_only); } @Test public void user2_db_general_infoSchema() throws Exception { updateClient(org1Users[2]); fromInfoSchemaHelper(db_general, - ImmutableList.of( - g_student_all_755, - g_voter_u2g1_750, - g_voter_all_755, - g_vw_g_student_u0_700 - ), - ImmutableList.of( - TableType.TABLE, - TableType.TABLE, - TableType.TABLE, - TableType.VIEW - )); + all_tables_in_db_general, + all_tables_type_in_db_general); } @Test public void user2_db_u1g1_only_infoSchema() throws Exception { updateClient(org1Users[2]); fromInfoSchemaHelper(db_u1g1_only, - ImmutableList.of( - u1g1_student_all_755, - u1g1_voter_all_755 - ), - ImmutableList.of( - TableType.TABLE, - TableType.TABLE - )); + all_tables_in_db_u1g1_only, + all_tables_type_db_u1g1_only); } @Test public void user2_db_u0_only_infoSchema() throws Exception { updateClient(org1Users[2]); - fromInfoSchemaHelper(db_u0_only, newArrayList(u0_vw_voter_all_755), - newArrayList(TableType.VIEW)); + fromInfoSchemaHelper(db_u0_only, + newArrayList(all_tables_in_db_u0_only), + newArrayList(all_tables_type_in_db_u0_only)); } /** diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java index 3af874434d0..a2cdb780c6c 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java @@ -744,16 +744,38 @@ private ExecConstants() { public static final BooleanValidator CTAS_PARTITIONING_HASH_DISTRIBUTE_VALIDATOR = new BooleanValidator(CTAS_PARTITIONING_HASH_DISTRIBUTE, new OptionDescription("Uses a hash algorithm to distribute data on partition keys in a CTAS partitioning operation. An alpha option--for experimental use at this stage. Do not use in production systems.")); + + /** + * @deprecated option. It will not take any effect. + * The option added as part of DRILL-4577, was used to mark that hive tables should be loaded + * for all table names at once. Then as part of DRILL-4826 was added option to regulate bulk size, + * because big amount of views was causing performance degradation. After last improvements for + * DRILL-7115 both options ({@link ExecConstants#ENABLE_BULK_LOAD_TABLE_LIST_KEY} + * and {@link ExecConstants#BULK_LOAD_TABLE_LIST_BULK_SIZE_KEY}) became obsolete and may be removed + * in future releases. + */ + @Deprecated public static final String ENABLE_BULK_LOAD_TABLE_LIST_KEY = "exec.enable_bulk_load_table_list"; - public static final BooleanValidator ENABLE_BULK_LOAD_TABLE_LIST = new BooleanValidator(ENABLE_BULK_LOAD_TABLE_LIST_KEY, null); /** - * When getting Hive Table information with exec.enable_bulk_load_table_list set to true, - * use the exec.bulk_load_table_list.bulk_size to determine how many tables to fetch from HiveMetaStore - * at a time. (The number of tables can get to be quite large.) + * @see ExecConstants#ENABLE_BULK_LOAD_TABLE_LIST_KEY */ + @Deprecated + public static final BooleanValidator ENABLE_BULK_LOAD_TABLE_LIST = new BooleanValidator(ENABLE_BULK_LOAD_TABLE_LIST_KEY, + new OptionDescription("Deprecated after DRILL-7115 improvement.")); + + /** + * @see ExecConstants#ENABLE_BULK_LOAD_TABLE_LIST_KEY + */ + @Deprecated public static final String BULK_LOAD_TABLE_LIST_BULK_SIZE_KEY = "exec.bulk_load_table_list.bulk_size"; - public static final PositiveLongValidator BULK_LOAD_TABLE_LIST_BULK_SIZE = new PositiveLongValidator(BULK_LOAD_TABLE_LIST_BULK_SIZE_KEY, Integer.MAX_VALUE, null); + + /** + * @see ExecConstants#ENABLE_BULK_LOAD_TABLE_LIST_KEY + */ + @Deprecated + public static final PositiveLongValidator BULK_LOAD_TABLE_LIST_BULK_SIZE = new PositiveLongValidator(BULK_LOAD_TABLE_LIST_BULK_SIZE_KEY, Integer.MAX_VALUE, + new OptionDescription("Deprecated after DRILL-7115 improvement.")); /** * Option whose value is a comma separated list of admin usernames. Admin users are users who have special privileges diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/DrillParserUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/DrillParserUtil.java index 5cd4cd42493..ee1106d557b 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/DrillParserUtil.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/DrillParserUtil.java @@ -17,31 +17,31 @@ */ package org.apache.drill.exec.planner.sql.parser; +import java.util.ArrayList; import java.util.List; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.parser.SqlParserUtil; -import org.apache.calcite.util.Util; - -import org.apache.drill.shaded.guava.com.google.common.collect.Lists; /** * Helper methods or constants used in parsing a SQL query. */ public class DrillParserUtil { - public static final String CHARSET = Util.getDefaultCharset().name(); + private static final int CONDITION_LIST_CAPACITY = 3; public static SqlNode createCondition(SqlNode left, SqlOperator op, SqlNode right) { // if one of the operands is null, return the other - if (left == null || right == null) { - return left != null ? left : right; + if (left == null) { + return right; + } else if (right == null) { + return left; } - List listCondition = Lists.newArrayList(); + List listCondition = new ArrayList<>(CONDITION_LIST_CAPACITY); listCondition.add(left); listCondition.add(new SqlParserUtil.ToTreeListItem(op, SqlParserPos.ZERO)); listCondition.add(right); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java index c510525c5fb..aabcbb67530 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractSchema.java @@ -23,9 +23,10 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; -import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap; import org.apache.calcite.linq4j.tree.DefaultExpression; import org.apache.calcite.linq4j.tree.Expression; import org.apache.calcite.rel.type.RelProtoDataType; @@ -38,9 +39,8 @@ import org.apache.drill.common.exceptions.UserException; import org.apache.drill.exec.dotdrill.View; import org.apache.drill.exec.planner.logical.CreateTableEntry; - import org.apache.drill.shaded.guava.com.google.common.base.Joiner; -import org.apache.drill.shaded.guava.com.google.common.collect.Lists; +import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap; public abstract class AbstractSchema implements Schema, SchemaPartitionExplorer, AutoCloseable { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractSchema.class); @@ -276,54 +276,29 @@ public void dropTable(String tableName) { } /** - * Get the collection of {@link Table} tables specified in the tableNames with bulk-load (if the underlying storage - * plugin supports). - * It is not guaranteed that the retrieved tables would have RowType and Statistic being fully populated. - * - * Specifically, calling {@link Table#getRowType(org.apache.calcite.rel.type.RelDataTypeFactory)} or {@link Table#getStatistic()} might incur - * {@link UnsupportedOperationException} being thrown. + * Get the collection of {@link Table} tables specified in the tableNames. * * @param tableNames the requested tables, specified by the table names * @return the collection of requested tables */ - public List> getTablesByNamesByBulkLoad(final List tableNames, int bulkSize) { - return getTablesByNames(tableNames); + public List> getTablesByNames(final Set tableNames) { + return tableNames.stream() + .map(tableName -> Pair.of(tableName, getTable(tableName))) + .filter(pair -> Objects.nonNull(pair.getValue())) // Schema may return NULL for table if user doesn't have permissions to load the table. + .collect(Collectors.toList()); } /** - * Get the collection of {@link Table} tables specified in the tableNames. + * Used by {@link org.apache.drill.exec.store.ischema.InfoSchemaRecordGenerator.Tables} + * for getting all table objects along with type for every requested schema. It's desired + * for this method to work fast because it impacts SHOW TABLES query. * - * @param tableNames the requested tables, specified by the table names - * @return the collection of requested tables + * @return collection of table names and types */ - public List> getTablesByNames(final List tableNames) { - final List> tables = Lists.newArrayList(); - for (String tableName : tableNames) { - final Table table = getTable(tableName); - if (table == null) { - // Schema may return NULL for table if the query user doesn't have permissions to load the table. Ignore such - // tables as INFO SCHEMA is about showing tables which the use has access to query. - continue; - } - tables.add(Pair.of(tableName, table)); - } - return tables; - } - - public List> getTableNamesAndTypes(boolean bulkLoad, int bulkSize) { - final List tableNames = Lists.newArrayList(getTableNames()); - final List> tableNamesAndTypes = Lists.newArrayList(); - final List> tables; - if (bulkLoad) { - tables = getTablesByNamesByBulkLoad(tableNames, bulkSize); - } else { - tables = getTablesByNames(tableNames); - } - for (Pair table : tables) { - tableNamesAndTypes.add(Pair.of(table.getKey(), table.getValue().getJdbcTableType())); - } - - return tableNamesAndTypes; + public Collection> getTableNamesAndTypes() { + return getTablesByNames(getTableNames()).stream() + .map(nameAndTable -> Pair.of(nameAndTable.getKey(), nameAndTable.getValue().getJdbcTableType())) + .collect(Collectors.toList()); } /** diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java index 2e302f72841..bf27f51b3d4 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java @@ -34,6 +34,7 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.regex.Pattern; import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; @@ -920,46 +921,11 @@ public void dropTable(String table) { } @Override - public List> getTableNamesAndTypes(boolean bulkLoad, int bulkSize) { - final List> tableNamesAndTypes = Lists.newArrayList(); - - // Look for raw tables first - if (!tables.isEmpty()) { - for (Map.Entry tableEntry : tables.entrySet()) { - tableNamesAndTypes - .add(Pair.of(tableEntry.getKey().sig.name, tableEntry.getValue().getJdbcTableType())); - } - } - // Then look for files that start with this name and end in .drill. - List files = Collections.emptyList(); - try { - files = DotDrillUtil.getDotDrills(getFS(), new Path(config.getLocation()), DotDrillType.VIEW); - } catch (AccessControlException e) { - if (!schemaConfig.getIgnoreAuthErrors()) { - logger.debug(e.getMessage()); - throw UserException.permissionError(e) - .message("Not authorized to list or query tables in schema [%s]", getFullSchemaName()) - .build(logger); - } - } catch (IOException e) { - logger.warn("Failure while trying to list view tables in workspace [{}]", getFullSchemaName(), e); - } catch (UnsupportedOperationException e) { - // the file system (e.g. the classpath filesystem) may not support listing - // of files. But see getViews(), it ignores the exception and continues - logger.debug("Failure while trying to list view tables in workspace [{}]", getFullSchemaName(), e); - } - - try { - for (DotDrillFile f : files) { - if (f.getType() == DotDrillType.VIEW) { - tableNamesAndTypes.add(Pair.of(f.getBaseName(), TableType.VIEW)); - } - } - } catch (UnsupportedOperationException e) { - logger.debug("The filesystem for this workspace does not support this operation.", e); - } - - return tableNamesAndTypes; + public List> getTableNamesAndTypes() { + return Stream.concat( + tables.entrySet().stream().map(kv -> Pair.of(kv.getKey().sig.name, kv.getValue().getJdbcTableType())), + getViews().stream().map(viewName -> Pair.of(viewName, TableType.VIEW)) + ).collect(Collectors.toList()); } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java index bb49e171e4e..a85a7a74f08 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaRecordGenerator.java @@ -266,8 +266,7 @@ private void scanSchema(String schemaPath, SchemaPlus schema) { */ public void visitTables(String schemaPath, SchemaPlus schema) { final AbstractSchema drillSchema = schema.unwrap(AbstractSchema.class); - final List tableNames = Lists.newArrayList(schema.getTableNames()); - for(Pair tableNameToTable : drillSchema.getTablesByNames(tableNames)) { + for (Pair tableNameToTable : drillSchema.getTablesByNames(schema.getTableNames())) { final String tableName = tableNameToTable.getKey(); final Table table = tableNameToTable.getValue(); final TableType tableType = table.getJdbcTableType(); @@ -338,26 +337,16 @@ public PojoRecordReader getRecordReader() { @Override public void visitTables(String schemaPath, SchemaPlus schema) { - final AbstractSchema drillSchema = schema.unwrap(AbstractSchema.class); - final List> tableNamesAndTypes = drillSchema - .getTableNamesAndTypes(optionManager.getOption(ExecConstants.ENABLE_BULK_LOAD_TABLE_LIST), - (int)optionManager.getOption(ExecConstants.BULK_LOAD_TABLE_LIST_BULK_SIZE)); - - for (Pair tableNameAndType : tableNamesAndTypes) { - final String tableName = tableNameAndType.getKey(); - final TableType tableType = tableNameAndType.getValue(); - // Visit the table, and if requested ... - if (shouldVisitTable(schemaPath, tableName, tableType)) { - visitTableWithType(schemaPath, tableName, tableType); - } - } + schema.unwrap(AbstractSchema.class).getTableNamesAndTypes() + .forEach(nameAndType -> attemptVisitTableWithType(schemaPath, nameAndType.getKey(), nameAndType.getValue())); } - private void visitTableWithType(String schemaName, String tableName, TableType type) { - Preconditions - .checkNotNull(type, "Error. Type information for table %s.%s provided is null.", schemaName, - tableName); - records.add(new Records.Table(IS_CATALOG_NAME, schemaName, tableName, type.toString())); + private void attemptVisitTableWithType(final String schemaName, final String tableName, + final TableType type) { + // Visit the table if requested ... + if (shouldVisitTable(schemaName, tableName, type)) { + records.add(new Records.Table(IS_CATALOG_NAME, schemaName, tableName, type.toString())); + } } @Override