Skip to content

Commit

Permalink
HIVE-15312 : reduce logging in certain places (Sergey Shelukhin, revi…
Browse files Browse the repository at this point in the history
…ewed by Prasanth Jayachandran)
  • Loading branch information
sershe-apache committed Dec 2, 2016
1 parent 98a25f2 commit 2f9728e
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 25 deletions.
32 changes: 32 additions & 0 deletions common/src/java/org/apache/hive/common/util/Ref.java
@@ -0,0 +1,32 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hive.common.util;

/** Reference to T. */
public final class Ref<T> {
public T value;

public Ref(T value) {
this.value = value;
}

public static <T> Ref<T> from(T t) {
return new Ref<T>(t);
}
}
Expand Up @@ -838,7 +838,7 @@ protected void setResponseHeaders(HttpResponse response,
response.setHeader(HttpHeaders.Names.CONNECTION, HttpHeaders.Values.KEEP_ALIVE);
response.setHeader(HttpHeaders.Values.KEEP_ALIVE, "timeout="
+ connectionKeepAliveTimeOut);
LOG.info("Content Length in shuffle : " + contentLength);
LOG.debug("Content Length in shuffle : " + contentLength);
}
}

Expand Down
Expand Up @@ -1211,7 +1211,7 @@ private void preemptTasks(int forPriority, int numTasksToPreempt, String []poten
}
} else {
// No tasks qualify as preemptable
LOG.info("No tasks qualify as killable to schedule tasks at priority {}", forPriority);
LOG.debug("No tasks qualify as killable to schedule tasks at priority {}", forPriority);
break;
}
}
Expand Down Expand Up @@ -1602,8 +1602,8 @@ boolean hadCommFailure() {
boolean canAcceptTask() {
boolean result = !hadCommFailure && !disabled
&&(numSchedulableTasks == -1 || ((numSchedulableTasks - numScheduledTasks) > 0));
if (LOG.isInfoEnabled()) {
LOG.info("Node[" + serviceInstance.getHost() + ":" + serviceInstance.getRpcPort() + ", " +
if (LOG.isDebugEnabled()) {
LOG.debug("Node[" + serviceInstance.getHost() + ":" + serviceInstance.getRpcPort() + ", " +
serviceInstance.getWorkerIdentity() + "]: " +
"canAcceptTask={}, numScheduledTasks={}, numSchedulableTasks={}, hadCommFailure={}, disabled={}",
result, numScheduledTasks, numSchedulableTasks, hadCommFailure, disabled);
Expand Down
18 changes: 11 additions & 7 deletions ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
Expand Up @@ -2974,6 +2974,8 @@ public static List<Path> getInputPaths(JobConf job, MapWork work, Path hiveScrat

// The alias may not have any path
Path path = null;
boolean hasLogged = false;
// Note: this copies the list because createDummyFileForEmptyPartition may modify the map.
for (Path file : new LinkedList<Path>(work.getPathToAliases().keySet())) {
List<String> aliases = work.getPathToAliases().get(file);
if (aliases.contains(alias)) {
Expand All @@ -2986,13 +2988,15 @@ public static List<Path> getInputPaths(JobConf job, MapWork work, Path hiveScrat
}

pathsProcessed.add(path);

LOG.info("Adding input file " + path);
if (!skipDummy
&& isEmptyPath(job, path, ctx)) {
path = createDummyFileForEmptyPartition(path, job, work,
hiveScratchDir);

if (LOG.isDebugEnabled()) {
LOG.debug("Adding input file " + path);
} else if (!hasLogged) {
hasLogged = true;
LOG.info("Adding " + work.getPathToAliases().size()
+ " inputs; the first input is " + path);
}
if (!skipDummy && isEmptyPath(job, path, ctx)) {
path = createDummyFileForEmptyPartition(path, job, work, hiveScratchDir);
}
pathsToAdd.add(path);
}
Expand Down
32 changes: 27 additions & 5 deletions ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
Expand Up @@ -42,6 +42,7 @@
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hive.common.util.Ref;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -763,19 +764,34 @@ public static Directory getAcidState(Path directory,
boolean useFileIds,
boolean ignoreEmptyFiles
) throws IOException {
return getAcidState(directory, conf, txnList, Ref.from(useFileIds), ignoreEmptyFiles);
}

public static Directory getAcidState(Path directory,
Configuration conf,
ValidTxnList txnList,
Ref<Boolean> useFileIds,
boolean ignoreEmptyFiles
) throws IOException {
FileSystem fs = directory.getFileSystem(conf);
// The following 'deltas' includes all kinds of delta files including insert & delete deltas.
final List<ParsedDelta> deltas = new ArrayList<ParsedDelta>();
List<ParsedDelta> working = new ArrayList<ParsedDelta>();
List<FileStatus> originalDirectories = new ArrayList<FileStatus>();
final List<FileStatus> obsolete = new ArrayList<FileStatus>();
List<HdfsFileStatusWithId> childrenWithId = null;
if (useFileIds) {
Boolean val = useFileIds.value;
if (val == null || val) {
try {
childrenWithId = SHIMS.listLocatedHdfsStatus(fs, directory, hiddenFileFilter);
if (val == null) {
useFileIds.value = true;
}
} catch (Throwable t) {
LOG.error("Failed to get files with ID; using regular API: " + t.getMessage());
useFileIds = false;
if (val == null && t instanceof UnsupportedOperationException) {
useFileIds.value = false;
}
}
}
TxnBase bestBase = new TxnBase();
Expand Down Expand Up @@ -995,15 +1011,21 @@ public Long getFileId() {
* @throws IOException
*/
private static void findOriginals(FileSystem fs, FileStatus stat,
List<HdfsFileStatusWithId> original, boolean useFileIds) throws IOException {
List<HdfsFileStatusWithId> original, Ref<Boolean> useFileIds) throws IOException {
assert stat.isDir();
List<HdfsFileStatusWithId> childrenWithId = null;
if (useFileIds) {
Boolean val = useFileIds.value;
if (val == null || val) {
try {
childrenWithId = SHIMS.listLocatedHdfsStatus(fs, stat.getPath(), hiddenFileFilter);
if (val == null) {
useFileIds.value = true;
}
} catch (Throwable t) {
LOG.error("Failed to get files with ID; using regular API: " + t.getMessage());
useFileIds = false;
if (val == null && t instanceof UnsupportedOperationException) {
useFileIds.value = false;
}
}
}
if (childrenWithId != null) {
Expand Down
41 changes: 34 additions & 7 deletions ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
Expand Up @@ -106,6 +106,7 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.hive.common.util.Ref;
import org.apache.orc.ColumnStatistics;
import org.apache.orc.OrcProto;
import org.apache.orc.OrcUtils;
Expand Down Expand Up @@ -1015,11 +1016,16 @@ static final class FileGenerator implements Callable<AcidDirInfo> {
private final Context context;
private final FileSystem fs;
private final Path dir;
private final boolean useFileIds;
private final Ref<Boolean> useFileIds;
private final UserGroupInformation ugi;

FileGenerator(Context context, FileSystem fs, Path dir, boolean useFileIds,
UserGroupInformation ugi) {
this(context, fs, dir, Ref.from(useFileIds), ugi);
}

FileGenerator(Context context, FileSystem fs, Path dir, Ref<Boolean> useFileIds,
UserGroupInformation ugi) {
this.context = context;
this.fs = fs;
this.dir = dir;
Expand Down Expand Up @@ -1082,16 +1088,23 @@ private AcidDirInfo callInternal() throws IOException {
} else {
// This is a normal insert delta, which only has insert events and hence all the files
// in this delta directory can be considered as a base.
if (useFileIds) {
Boolean val = useFileIds.value;
if (val == null || val) {
try {
List<HdfsFileStatusWithId> insertDeltaFiles =
SHIMS.listLocatedHdfsStatus(fs, parsedDelta.getPath(), AcidUtils.hiddenFileFilter);
for (HdfsFileStatusWithId fileId : insertDeltaFiles) {
baseFiles.add(new AcidBaseFileInfo(fileId, AcidUtils.AcidBaseFileType.INSERT_DELTA));
}
if (val == null) {
useFileIds.value = true; // The call succeeded, so presumably the API is there.
}
continue; // move on to process to the next parsedDelta.
} catch (Throwable t) {
LOG.error("Failed to get files with ID; using regular API: " + t.getMessage());
if (val == null && t instanceof UnsupportedOperationException) {
useFileIds.value = false;
}
}
}
// Fall back to regular API and create statuses without ID.
Expand All @@ -1112,12 +1125,21 @@ private AcidDirInfo callInternal() throws IOException {
}

private List<HdfsFileStatusWithId> findBaseFiles(
Path base, boolean useFileIds) throws IOException {
if (useFileIds) {
Path base, Ref<Boolean> useFileIds) throws IOException {
Boolean val = useFileIds.value;
if (val == null || val) {
try {
return SHIMS.listLocatedHdfsStatus(fs, base, AcidUtils.hiddenFileFilter);
List<HdfsFileStatusWithId> result = SHIMS.listLocatedHdfsStatus(
fs, base, AcidUtils.hiddenFileFilter);
if (val == null) {
useFileIds.value = true; // The call succeeded, so presumably the API is there.
}
return result;
} catch (Throwable t) {
LOG.error("Failed to get files with ID; using regular API: " + t.getMessage());
if (val == null && t instanceof UnsupportedOperationException) {
useFileIds.value = false;
}
}
}

Expand Down Expand Up @@ -1542,8 +1564,13 @@ static List<OrcSplit> generateSplitsInfo(Configuration conf, Context context)
if (LOG.isInfoEnabled()) {
LOG.info("ORC pushdown predicate: " + context.sarg);
}
boolean useFileIds = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS);
boolean allowSyntheticFileIds = useFileIds && HiveConf.getBoolVar(
boolean useFileIdsConfig = HiveConf.getBoolVar(
conf, ConfVars.HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS);
// Sharing this state assumes splits will succeed or fail to get it together (same FS).
// We also start with null and only set it to true on the first call, so we would only do
// the global-disable thing on the first failure w/the API error, not any random failure.
Ref<Boolean> useFileIds = Ref.from(useFileIdsConfig ? null : false);
boolean allowSyntheticFileIds = useFileIdsConfig && HiveConf.getBoolVar(
conf, ConfVars.HIVE_ORC_ALLOW_SYNTHETIC_FILE_ID_IN_SPLITS);
List<OrcSplit> splits = Lists.newArrayList();
List<Future<AcidDirInfo>> pathFutures = Lists.newArrayList();
Expand Down
6 changes: 4 additions & 2 deletions ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
Expand Up @@ -98,8 +98,10 @@ public void write(DataOutput out) throws IOException {
int additional = bos.size() - required;

out.write(bos.toByteArray());
LOG.info("Writing additional {} bytes to OrcSplit as payload. Required {} bytes.", additional,
required);
if (LOG.isTraceEnabled()) {
LOG.trace("Writing additional {} bytes to OrcSplit as payload. Required {} bytes.",
additional, required);
}
}

private void writeAdditionalPayload(final DataOutputStream out) throws IOException {
Expand Down

0 comments on commit 2f9728e

Please sign in to comment.