From a1d22c73581ff78e12f12e919e24bff29afd340f Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat Date: Thu, 27 Sep 2018 11:21:52 +0530 Subject: [PATCH 1/5] HIVE-20644: Avoid exposing sensitive infomation through a Hive Runtime exception (Ashutosh Bapat, reviewed by Sankar Hariappan) --- .../apache/hadoop/hive/ql/exec/FunctionRegistry.java | 7 +++++-- .../org/apache/hadoop/hive/ql/exec/MapOperator.java | 10 ++++++++-- .../org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java | 9 +++++++-- .../hadoop/hive/ql/exec/tez/ReduceRecordSource.java | 9 +++++++-- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 0bc8d840d2fc..53bd6ab48cd4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -1115,8 +1115,11 @@ public static Object invoke(Method m, Object thisObject, Object... arguments) String detailedMsg = e instanceof java.lang.reflect.InvocationTargetException ? e.getCause().getMessage() : e.getMessage(); - throw new HiveException("Unable to execute method " + m + " with arguments " - + argumentString + ":" + detailedMsg, e); + // Log the arguments into a debug message for the ease of debugging. But when exposed through + // an error message they can leak sensitive information, even to the client application. + LOG.debug("Unable to execute method " + m + " with arguments " + + argumentString); + throw new HiveException("Unable to execute method " + m + ":" + detailedMsg, e); } return o; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index b9986d31cd47..42911ae86328 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -562,9 +562,15 @@ public void process(Writable value) throws HiveException { } if (row == null) { deserialize_error_count.set(deserialize_error_count.get() + 1); - throw new HiveException("Hive Runtime Error while processing writable " + message, e); + LOG.debug("Hive Runtime Error while processing writable " + message); + throw new HiveException("Hive Runtime Error while processing writable", e); } - throw new HiveException("Hive Runtime Error while processing row " + message, e); + + // Log the contents of the row that caused exception so that it's available for debugging. But + // when exposed through an error message it can leak sensitive information, even to the + // client application. + LOG.debug("Hive Runtime Error while processing row " + message); + throw new HiveException("Hive Runtime Error while processing row", e); } } rowsForwarded(childrenDone, 1); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java index 829006d3754b..bce93d96ea97 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java @@ -240,8 +240,13 @@ public void reduce(Object key, Iterator values, OutputCollector output, rowString = "[Error getting row data with exception " + StringUtils.stringifyException(e2) + " ]"; } - throw new HiveException("Hive Runtime Error while processing row (tag=" - + tag + ") " + rowString, e); + + // Log the contents of the row that caused exception so that it's available for debugging. But + // when exposed through an error message it can leak sensitive information, even to the + // client application. + LOG.debug("Hive Runtime Error while processing row (tag=" + + tag + ") " + rowString); + throw new HiveException("Hive Runtime Error while processing row", e); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index 5698639c5db5..cea6708693d0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -369,8 +369,13 @@ public void next() throws HiveException { rowString = "[Error getting row data with exception " + StringUtils.stringifyException(e2) + " ]"; } - throw new HiveException("Hive Runtime Error while processing row (tag=" - + tag + ") " + rowString, e); + + // Log the contents of the row that caused exception so that it's available for debugging. But + // when exposed through an error message it can leak sensitive information, even to the + // client application. + l4j.debug("Hive Runtime Error while processing row (tag=" + + tag + ") " + rowString); + throw new HiveException("Hive Runtime Error while processing row", e); } } } From 05659e20352df577ada0018f1050938ac92561fb Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat Date: Thu, 4 Oct 2018 12:53:53 +0530 Subject: [PATCH 2/5] HIVE-20644: Address following comments by Thejas Nair For completeness, maybe make similar update in ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java as well. Instead of DEBUG use TRACE, which is unlikely to be turned ON without due consideration and communicated to beeline. Ashutosh Bapat --- .../org/apache/hadoop/hive/ql/exec/FunctionRegistry.java | 2 +- .../java/org/apache/hadoop/hive/ql/exec/MapOperator.java | 4 ++-- .../org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java | 2 +- .../hive/ql/exec/spark/SparkReduceRecordHandler.java | 8 ++++++-- .../hadoop/hive/ql/exec/tez/ReduceRecordSource.java | 2 +- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 53bd6ab48cd4..b7ca7c737054 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -1117,7 +1117,7 @@ public static Object invoke(Method m, Object thisObject, Object... arguments) // Log the arguments into a debug message for the ease of debugging. But when exposed through // an error message they can leak sensitive information, even to the client application. - LOG.debug("Unable to execute method " + m + " with arguments " + LOG.trace("Unable to execute method " + m + " with arguments " + argumentString); throw new HiveException("Unable to execute method " + m + ":" + detailedMsg, e); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index 42911ae86328..1cbc27220727 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -562,14 +562,14 @@ public void process(Writable value) throws HiveException { } if (row == null) { deserialize_error_count.set(deserialize_error_count.get() + 1); - LOG.debug("Hive Runtime Error while processing writable " + message); + LOG.trace("Hive Runtime Error while processing writable " + message); throw new HiveException("Hive Runtime Error while processing writable", e); } // Log the contents of the row that caused exception so that it's available for debugging. But // when exposed through an error message it can leak sensitive information, even to the // client application. - LOG.debug("Hive Runtime Error while processing row " + message); + LOG.trace("Hive Runtime Error while processing row " + message); throw new HiveException("Hive Runtime Error while processing row", e); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java index bce93d96ea97..e106bc914983 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java @@ -244,7 +244,7 @@ public void reduce(Object key, Iterator values, OutputCollector output, // Log the contents of the row that caused exception so that it's available for debugging. But // when exposed through an error message it can leak sensitive information, even to the // client application. - LOG.debug("Hive Runtime Error while processing row (tag=" + LOG.trace("Hive Runtime Error while processing row (tag=" + tag + ") " + rowString); throw new HiveException("Hive Runtime Error while processing row", e); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java index 6a7e1dfa59eb..8f34f27c48a2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java @@ -408,8 +408,12 @@ private boolean processKeyValues(Iterator values, byte tag) throws HiveEx rowString = "[Error getting row data with exception " + StringUtils.stringifyException(e2) + " ]"; } - throw new HiveException("Error while processing row (tag=" - + tag + ") " + rowString, e); + + // Log contents of the row which caused exception so that it's available for debugging. But + // when exposed through an error message it can leak sensitive information, even to the + // client application. + LOG.trace("Hive exception while processing row (tag=" + tag + ") " + rowString); + throw new HiveException("Error while processing row ", e); } } return true; // give me more diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index cea6708693d0..72446afedadd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -373,7 +373,7 @@ public void next() throws HiveException { // Log the contents of the row that caused exception so that it's available for debugging. But // when exposed through an error message it can leak sensitive information, even to the // client application. - l4j.debug("Hive Runtime Error while processing row (tag=" + l4j.trace("Hive Runtime Error while processing row (tag=" + tag + ") " + rowString); throw new HiveException("Hive Runtime Error while processing row", e); } From 24759c56f94ce7232e1d66fca8e492a111da090e Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat Date: Thu, 4 Oct 2018 13:08:01 +0530 Subject: [PATCH 3/5] HIVE-20644: Additional changes per Thejas's following comment For completeness, maybe make similar update in ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java as well. The change in the previous commit to the above file is required without any doubt since it exposes the row. But the changes in this commit are a bit dubious since I am not sure if the key or value contains any sensitive information. Ashutosh Bapat --- .../exec/spark/SparkReduceRecordHandler.java | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java index 8f34f27c48a2..b8b5888d96c5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java @@ -346,11 +346,14 @@ public void processRow(Object key, Iterator values) throws IOException { try { keyObject = inputKeyDeserializer.deserialize(keyWritable); } catch (Exception e) { - throw new HiveException( - "Hive Runtime Error: Unable to deserialize reduce input key from " + // Log the input key which caused exception so that it's available for debugging. But when + // exposed through an error message it can leak sensitive information, even to the client + // application. + LOG.trace("Hive Runtime Error: Unable to deserialize reduce input key from " + Utilities.formatBinaryString(keyWritable.get(), 0, keyWritable.getSize()) + " with properties " - + keyTableDesc.getProperties(), e); + + keyTableDesc.getProperties()); + throw new HiveException("Hive Runtime Error: Unable to deserialize reduce input key ", e); } groupKey.set(keyWritable.get(), 0, keyWritable.getSize()); @@ -384,13 +387,16 @@ private boolean processKeyValues(Iterator values, byte tag) throws HiveEx try { valueObject[tag] = inputValueDeserializer[tag].deserialize(valueWritable); } catch (SerDeException e) { - throw new HiveException( - "Hive Runtime Error: Unable to deserialize reduce input value (tag=" + // Log the input value which caused exception so that it's available for debugging. But when + // exposed through an error message it can leak sensitive information, even to the client + // application. + LOG.trace("Hive Runtime Error: Unable to deserialize reduce input value (tag=" + tag + ") from " + Utilities.formatBinaryString(valueWritable.get(), 0, valueWritable.getSize()) + " with properties " - + valueTableDesc[tag].getProperties(), e); + + valueTableDesc[tag].getProperties()); + throw new HiveException("Hive Runtime Error: Unable to deserialize reduce input value ", e); } row.clear(); row.add(keyObject); @@ -574,10 +580,14 @@ private Object deserializeValue(BytesWritable valueWritable, byte tag) throws Hi try { return inputValueDeserializer[tag].deserialize(valueWritable); } catch (SerDeException e) { - throw new HiveException("Error: Unable to deserialize reduce input value (tag=" + // Log the input value which caused exception so that it's available for debugging. But when + // exposed through an error message it can leak sensitive information, even to the client + // application. + LOG.trace("Error: Unable to deserialize reduce input value (tag=" + tag + ") from " + Utilities.formatBinaryString(valueWritable.getBytes(), 0, valueWritable.getLength()) - + " with properties " + valueTableDesc[tag].getProperties(), e); + + " with properties " + valueTableDesc[tag].getProperties()); + throw new HiveException("Error: Unable to deserialize reduce input value ", e); } } From d73bd4c8f01f71bb567d0878d742ab6096009c63 Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat Date: Fri, 5 Oct 2018 09:58:40 +0530 Subject: [PATCH 4/5] HIVE-20644: Fixed checkstyle and whitespace errors reported by Hive QA --- .../exec/spark/SparkReduceRecordHandler.java | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java index b8b5888d96c5..398d1c69c4dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java @@ -346,9 +346,9 @@ public void processRow(Object key, Iterator values) throws IOException { try { keyObject = inputKeyDeserializer.deserialize(keyWritable); } catch (Exception e) { - // Log the input key which caused exception so that it's available for debugging. But when - // exposed through an error message it can leak sensitive information, even to the client - // application. + // Log the input key which caused exception so that it's available for debugging. But when + // exposed through an error message it can leak sensitive information, even to the client + // application. LOG.trace("Hive Runtime Error: Unable to deserialize reduce input key from " + Utilities.formatBinaryString(keyWritable.get(), 0, keyWritable.getSize()) + " with properties " @@ -387,9 +387,9 @@ private boolean processKeyValues(Iterator values, byte tag) throws HiveEx try { valueObject[tag] = inputValueDeserializer[tag].deserialize(valueWritable); } catch (SerDeException e) { - // Log the input value which caused exception so that it's available for debugging. But when - // exposed through an error message it can leak sensitive information, even to the client - // application. + // Log the input value which caused exception so that it's available for debugging. But when + // exposed through an error message it can leak sensitive information, even to the client + // application. LOG.trace("Hive Runtime Error: Unable to deserialize reduce input value (tag=" + tag + ") from " @@ -415,9 +415,9 @@ private boolean processKeyValues(Iterator values, byte tag) throws HiveEx + StringUtils.stringifyException(e2) + " ]"; } - // Log contents of the row which caused exception so that it's available for debugging. But - // when exposed through an error message it can leak sensitive information, even to the - // client application. + // Log contents of the row which caused exception so that it's available for debugging. But + // when exposed through an error message it can leak sensitive information, even to the + // client application. LOG.trace("Hive exception while processing row (tag=" + tag + ") " + rowString); throw new HiveException("Error while processing row ", e); } @@ -583,10 +583,10 @@ private Object deserializeValue(BytesWritable valueWritable, byte tag) throws Hi // Log the input value which caused exception so that it's available for debugging. But when // exposed through an error message it can leak sensitive information, even to the client // application. - LOG.trace("Error: Unable to deserialize reduce input value (tag=" - + tag + ") from " - + Utilities.formatBinaryString(valueWritable.getBytes(), 0, valueWritable.getLength()) - + " with properties " + valueTableDesc[tag].getProperties()); + LOG.trace("Error: Unable to deserialize reduce input value (tag=" + tag + ") from " + + Utilities.formatBinaryString(valueWritable.getBytes(), 0, + valueWritable.getLength()) + + " with properties " + valueTableDesc[tag].getProperties()); throw new HiveException("Error: Unable to deserialize reduce input value ", e); } } From fc6e36f100bdf693023241624c9b21f000a362da Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat Date: Mon, 8 Oct 2018 14:36:06 +0530 Subject: [PATCH 5/5] HIVE-20644: Fixed some more checkstyle and whitespace errors reported by Hive QA. --- .../hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java index 398d1c69c4dc..20e7ea0f4e8d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java @@ -580,9 +580,9 @@ private Object deserializeValue(BytesWritable valueWritable, byte tag) throws Hi try { return inputValueDeserializer[tag].deserialize(valueWritable); } catch (SerDeException e) { - // Log the input value which caused exception so that it's available for debugging. But when - // exposed through an error message it can leak sensitive information, even to the client - // application. + // Log the input value which caused exception so that it's available for debugging. But when + // exposed through an error message it can leak sensitive information, even to the client + // application. LOG.trace("Error: Unable to deserialize reduce input value (tag=" + tag + ") from " + Utilities.formatBinaryString(valueWritable.getBytes(), 0, valueWritable.getLength()) +