Skip to content

Commit

Permalink
DRILL-7115: Improve Hive schema show tables performance
Browse files Browse the repository at this point in the history
1. To make SHOW TABLES for Hive schema work much faster, additional Drill
   feature of showing only accesible tables when Storage-Based authorization
   is enabled was sacrificed. Now the behaviour matches to Hive/Beeline, all
   tables will be shown despite of accessibility. For details about previous
   show tables results, check description of DRILL-540.
2. In HiveDatabaseSchema implemented faster getTableNamesAndTypes() method
   and removed bulk related code.
3. Deprecated bulk related options and removed bulk code from AbstractSchema,
   DrillHiveMetastoreClient.
4. For 8000 Hive tables query returned in 1.8 seconds, for combination of
   4000 tables and 8000 views query returned in 2.3 seconds. Note, that
   after first query table names will be cached and next queries will perform
   in less than 1 sec.
5. Refactored WorkspaceSchemaFactory's getTableNamesAndTypes()
   method to reuse existing getViews() method.
6. DrillHiveMetastoreClient was refactored. Classes were unnested and enclosed
   within client package with restricted visibility. Also was updated cache
   values type to avoid unnecessarry List to Set back and forth conversions.
   Client creation methods moved to separate class. So the new package
   exposes only factory and client class.

closes #1706
  • Loading branch information
ihuzenko authored and sohami committed Apr 5, 2019
1 parent 3772757 commit cf51aa7
Show file tree
Hide file tree
Showing 20 changed files with 1,058 additions and 913 deletions.

This file was deleted.

Expand Up @@ -51,7 +51,7 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeName;

import static org.apache.drill.exec.store.hive.DrillHiveMetaStoreClient.createPartitionWithSpecColumns;
import static org.apache.drill.exec.store.hive.HiveUtilities.createPartitionWithSpecColumns;

@JsonTypeName("hive-scan")
public class HiveScan extends AbstractGroupScan {
Expand Down
Expand Up @@ -56,6 +56,7 @@
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.exec.Utilities;
Expand Down Expand Up @@ -743,5 +744,20 @@ public static HiveConf generateHiveConf(HiveConf hiveConf, Map<String, String> p
return newHiveConf;
}

/**
* Helper method which stores partition columns in table columnListCache. If table columnListCache has exactly the
* same columns as partition, in partition stores columns index that corresponds to identical column list.
* If table columnListCache hasn't such column list, the column list adds to table columnListCache and in partition
* stores columns index that corresponds to column list.
*
* @param table hive table instance
* @param partition partition instance
* @return hive partition wrapper
*/
public static HiveTableWrapper.HivePartitionWrapper createPartitionWithSpecColumns(HiveTableWithColumnCache table, Partition partition) {
int listIndex = table.getColumnListsCache().addOrGet(partition.getSd().getCols());
return new HiveTableWrapper.HivePartitionWrapper(new HivePartition(partition, listIndex));
}

}

@@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.hive.client;

import java.util.List;

import org.apache.drill.common.AutoCloseables;
import org.apache.drill.shaded.guava.com.google.common.cache.CacheLoader;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* CacheLoader that synchronized on client and tries to reconnect when
* client fails. Used by {@link HiveMetadataCache}.
*/
final class DatabaseNameCacheLoader extends CacheLoader<String, List<String>> {

private static final Logger logger = LoggerFactory.getLogger(DatabaseNameCacheLoader.class);

private final DrillHiveMetaStoreClient client;

DatabaseNameCacheLoader(DrillHiveMetaStoreClient client) {
this.client = client;
}

@Override
@SuppressWarnings("NullableProblems")
public List<String> load(String key) throws Exception {
synchronized (client) {
try {
return client.getAllDatabases();
} catch (MetaException e) {
/*
HiveMetaStoreClient is encapsulating both the MetaException/TExceptions inside MetaException.
Since we don't have good way to differentiate, we will close older connection and retry once.
This is only applicable for getAllTables and getAllDatabases method since other methods are
properly throwing correct exceptions.
*/
logger.warn("Failure while attempting to get hive databases. Retries once.", e);
AutoCloseables.closeSilently(client::close);
try {
/*
Attempt to reconnect. If this is a secure connection, this will fail due
to the invalidation of the security token. In that case, throw the original
exception and let a higher level clean up. Ideally we'd get a new token
here, but doing so requires the use of a different connection, and that
one has also become invalid. This code needs a rework; this is just a
work-around.
*/
client.reconnect();
} catch (Exception e1) {
throw e;
}
return client.getAllDatabases();
}
}
}

}
@@ -0,0 +1,106 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.hive.client;

import java.util.List;
import java.util.Map;

import org.apache.calcite.schema.Schema.TableType;
import org.apache.drill.exec.store.hive.HiveReadEntry;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.thrift.TException;

/**
* Extension of HiveMetaStoreClient with addition of cache and methods useful
* for Drill schema. Note, that access to parent class is synchronized either
* on cache loading level or in overridden methods, and the synchronization
* should not be neglected in child classes.
*/
public class DrillHiveMetaStoreClient extends HiveMetaStoreClient {

/**
* Unified API for work with HiveMetaStoreClient
* client through local caches.
*/
private final HiveMetadataCache hiveMetadataCache;

/**
* Package visibility performs two roles here:
* 1) ensure that child classes in same package;
* 2) ensure that instances published to other packages
* by {@link DrillHiveMetaStoreClientFactory}.
*
* @param hiveConf hive conf from storage plugin
* @throws MetaException when initialization failed
*/
DrillHiveMetaStoreClient(final HiveConf hiveConf) throws MetaException {
super(hiveConf);
hiveMetadataCache = new HiveMetadataCache(this, hiveConf);
}

/**
* Lists all Hive database names.
*
* @param ignoreAuthzErrors whether authorization errors should be ignored
* @return list of Hive databases
* @throws TException when client fails
*/
public List<String> getDatabases(boolean ignoreAuthzErrors) throws TException {
return hiveMetadataCache.getDbNames();
}

/**
* Returns table metadata for concrete table
*
* @param dbName name of database
* @param tableName name of table
* @return {@link HiveReadEntry} containing table meta like columns, partitions etc.
* @throws TException when client fails
*/
public HiveReadEntry getHiveReadEntry(final String dbName, final String tableName, boolean ignoreAuthzErrors) throws TException {
return hiveMetadataCache.getHiveReadEntry(dbName, tableName);
}

/**
* Returns collection of view and table names along with their types.
*
* @param dbName name of database
* @param ignoreAuthzErrors hint for handling authorization errors
* @return map where keys are db object names values are types (VIEW or TABLE)
* @throws TException in case when if loader thrown ExecutionException
*/
public Map<String, TableType> getTableNamesAndTypes(final String dbName, boolean ignoreAuthzErrors) throws TException {
return hiveMetadataCache.getTableNamesAndTypes(dbName);
}

/**
* Overridden to enforce synchronization.
*
* @param owner the intended owner for the token
* @param renewerKerberosPrincipalName kerberos user
* @return the string of the token
* @throws TException when client fails
*/
@Override
public synchronized String getDelegationToken(String owner, String renewerKerberosPrincipalName) throws TException {
return super.getDelegationToken(owner, renewerKerberosPrincipalName);
}

}
@@ -0,0 +1,104 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.hive.client;

import java.io.IOException;
import java.security.PrivilegedExceptionAction;

import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.exec.util.ImpersonationUtil;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.shims.Utils;
import org.apache.hadoop.security.UserGroupInformation;

/**
* Provides factory methods for initialization of {@link DrillHiveMetaStoreClient} instances.
*/
public final class DrillHiveMetaStoreClientFactory {

private DrillHiveMetaStoreClientFactory() {
}

/**
* Create a DrillHiveMetaStoreClient for cases where:
* 1. Drill impersonation is enabled and
* 2. either storage (in remote HiveMetaStore server) or SQL standard based authorization (in Hive storage plugin)
* is enabled
*
* @param processUserMetaStoreClient MetaStoreClient of process user. Useful for generating the delegation tokens when
* SASL (KERBEROS or custom SASL implementations) is enabled.
* @param hiveConf Conf including authorization configuration
* @param userName User who is trying to access the Hive metadata
* @return instance of client
*/
public static DrillHiveMetaStoreClient createClientWithAuthz(final DrillHiveMetaStoreClient processUserMetaStoreClient,
final HiveConf hiveConf, final String userName) {
try {
boolean delegationTokenGenerated = false;

final UserGroupInformation ugiForRpc; // UGI credentials to use for RPC communication with Hive MetaStore server
if (!hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS)) {
// If the user impersonation is disabled in Hive storage plugin (not Drill impersonation), use the process
// user UGI credentials.
ugiForRpc = ImpersonationUtil.getProcessUserUGI();
} else {
ugiForRpc = ImpersonationUtil.createProxyUgi(userName);
if (hiveConf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL)) {
// When SASL is enabled for proxy user create a delegation token. Currently HiveMetaStoreClient can create
// client transport for proxy users only when the authentication mechanims is DIGEST (through use of
// delegation tokens).
String delegationToken = processUserMetaStoreClient.getDelegationToken(userName, userName);
try {
Utils.setTokenStr(ugiForRpc, delegationToken, DrillHiveMetaStoreClientWithAuthorization.DRILL2HMS_TOKEN);
} catch (IOException e) {
throw new DrillRuntimeException("Couldn't setup delegation token in the UGI for Hive MetaStoreClient", e);
}
delegationTokenGenerated = true;
}
}

final HiveConf hiveConfForClient;
if (delegationTokenGenerated) {
hiveConfForClient = new HiveConf(hiveConf);
hiveConfForClient.set("hive.metastore.token.signature", DrillHiveMetaStoreClientWithAuthorization.DRILL2HMS_TOKEN);
} else {
hiveConfForClient = hiveConf;
}

return ugiForRpc.doAs((PrivilegedExceptionAction<DrillHiveMetaStoreClient>)
() -> new DrillHiveMetaStoreClientWithAuthorization(hiveConfForClient, ugiForRpc, userName));
} catch (final Exception e) {
throw new DrillRuntimeException("Failure setting up HiveMetaStore client.", e);
}
}

/**
* Create a DrillMetaStoreClient that can be shared across multiple users. This is created when impersonation is
* disabled.
*
* @param hiveConf hive properties set in Drill storage plugin
* @return instance of client
* @throws MetaException when initialization failed
*/
public static DrillHiveMetaStoreClient createCloseableClientWithCaching(final HiveConf hiveConf)
throws MetaException {
return new DrillHiveMetaStoreClient(hiveConf);
}

}

0 comments on commit cf51aa7

Please sign in to comment.