Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into ban-Class.forName
Browse files Browse the repository at this point in the history
  • Loading branch information
JoshRosen committed Jul 14, 2015
2 parents c0b7885 + 408b384 commit e3e96f7
Show file tree
Hide file tree
Showing 21 changed files with 244 additions and 563 deletions.
45 changes: 22 additions & 23 deletions core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
</td> +: getFormattedTimeQuantiles(serializationTimes)

val gettingResultTimes = validTasks.map { case TaskUIData(info, _, _) =>
getGettingResultTime(info).toDouble
getGettingResultTime(info, currentTime).toDouble
}
val gettingResultQuantiles =
<td>
Expand All @@ -346,7 +346,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
// machine and to send back the result (but not the time to fetch the task result,
// if it needed to be fetched from the block manager on the worker).
val schedulerDelays = validTasks.map { case TaskUIData(info, metrics, _) =>
getSchedulerDelay(info, metrics.get).toDouble
getSchedulerDelay(info, metrics.get, currentTime).toDouble
}
val schedulerDelayTitle = <td><span data-toggle="tooltip"
title={ToolTips.SCHEDULER_DELAY} data-placement="right">Scheduler Delay</span></td>
Expand Down Expand Up @@ -544,7 +544,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
val serializationTimeProportion = toProportion(serializationTime)
val deserializationTime = metricsOpt.map(_.executorDeserializeTime).getOrElse(0L)
val deserializationTimeProportion = toProportion(deserializationTime)
val gettingResultTime = getGettingResultTime(taskUIData.taskInfo)
val gettingResultTime = getGettingResultTime(taskUIData.taskInfo, currentTime)
val gettingResultTimeProportion = toProportion(gettingResultTime)
val schedulerDelay = totalExecutionTime -
(executorComputingTime + shuffleReadTime + shuffleWriteTime +
Expand Down Expand Up @@ -685,11 +685,11 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
else metrics.map(_.executorRunTime).getOrElse(1L)
val formatDuration = if (info.status == "RUNNING") UIUtils.formatDuration(duration)
else metrics.map(m => UIUtils.formatDuration(m.executorRunTime)).getOrElse("")
val schedulerDelay = metrics.map(getSchedulerDelay(info, _)).getOrElse(0L)
val schedulerDelay = metrics.map(getSchedulerDelay(info, _, currentTime)).getOrElse(0L)
val gcTime = metrics.map(_.jvmGCTime).getOrElse(0L)
val taskDeserializationTime = metrics.map(_.executorDeserializeTime).getOrElse(0L)
val serializationTime = metrics.map(_.resultSerializationTime).getOrElse(0L)
val gettingResultTime = getGettingResultTime(info)
val gettingResultTime = getGettingResultTime(info, currentTime)

val maybeAccumulators = info.accumulables
val accumulatorsReadable = maybeAccumulators.map{acc => s"${acc.name}: ${acc.update.get}"}
Expand Down Expand Up @@ -852,32 +852,31 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
<td>{errorSummary}{details}</td>
}

private def getGettingResultTime(info: TaskInfo): Long = {
if (info.gettingResultTime > 0) {
if (info.finishTime > 0) {
private def getGettingResultTime(info: TaskInfo, currentTime: Long): Long = {
if (info.gettingResult) {
if (info.finished) {
info.finishTime - info.gettingResultTime
} else {
// The task is still fetching the result.
System.currentTimeMillis - info.gettingResultTime
currentTime - info.gettingResultTime
}
} else {
0L
}
}

private def getSchedulerDelay(info: TaskInfo, metrics: TaskMetrics): Long = {
val totalExecutionTime =
if (info.gettingResult) {
info.gettingResultTime - info.launchTime
} else if (info.finished) {
info.finishTime - info.launchTime
} else {
0
}
val executorOverhead = (metrics.executorDeserializeTime +
metrics.resultSerializationTime)
math.max(
0,
totalExecutionTime - metrics.executorRunTime - executorOverhead - getGettingResultTime(info))
private def getSchedulerDelay(info: TaskInfo, metrics: TaskMetrics, currentTime: Long): Long = {
if (info.finished) {
val totalExecutionTime = info.finishTime - info.launchTime
val executorOverhead = (metrics.executorDeserializeTime +
metrics.resultSerializationTime)
math.max(
0,
totalExecutionTime - metrics.executorRunTime - executorOverhead -
getGettingResultTime(info, currentTime))
} else {
// The task is still running and the metrics like executorRunTime are not available.
0L
}
}
}
57 changes: 8 additions & 49 deletions dev/lint-python
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,12 @@ SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
PATHS_TO_CHECK="./python/pyspark/ ./ec2/spark_ec2.py ./examples/src/main/python/ ./dev/sparktestsupport"
PATHS_TO_CHECK="$PATHS_TO_CHECK ./dev/run-tests.py ./python/run-tests.py"
PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt"
PYLINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/pylint-report.txt"
PYLINT_INSTALL_INFO="$SPARK_ROOT_DIR/dev/pylint-info.txt"
PYTHON_LINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/python-lint-report.txt"

cd "$SPARK_ROOT_DIR"

# compileall: https://docs.python.org/2/library/compileall.html
python -B -m compileall -q -l $PATHS_TO_CHECK > "$PEP8_REPORT_PATH"
python -B -m compileall -q -l $PATHS_TO_CHECK > "$PYTHON_LINT_REPORT_PATH"
compile_status="${PIPESTATUS[0]}"

# Get pep8 at runtime so that we don't rely on it being installed on the build server.
Expand All @@ -49,36 +47,11 @@ if [ ! -e "$PEP8_SCRIPT_PATH" ]; then
fi
fi

# Easy install pylint in /dev/pylint. To easy_install into a directory, the PYTHONPATH should
# be set to the directory.
# dev/pylint should be appended to the PATH variable as well.
# Jenkins by default installs the pylint3 version, so for now this just checks the code quality
# of python3.
export "PYTHONPATH=$SPARK_ROOT_DIR/dev/pylint"
export "PYLINT_HOME=$PYTHONPATH"
export "PATH=$PYTHONPATH:$PATH"

if [ ! -d "$PYLINT_HOME" ]; then
mkdir "$PYLINT_HOME"
# Redirect the annoying pylint installation output.
easy_install -d "$PYLINT_HOME" pylint==1.4.4 &>> "$PYLINT_INSTALL_INFO"
easy_install_status="$?"

if [ "$easy_install_status" -ne 0 ]; then
echo "Unable to install pylint locally in \"$PYTHONPATH\"."
cat "$PYLINT_INSTALL_INFO"
exit "$easy_install_status"
fi

rm "$PYLINT_INSTALL_INFO"

fi

# There is no need to write this output to a file
#+ first, but we do so so that the check status can
#+ be output before the report, like with the
#+ scalastyle and RAT checks.
python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH"
python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 $PATHS_TO_CHECK >> "$PYTHON_LINT_REPORT_PATH"
pep8_status="${PIPESTATUS[0]}"

if [ "$compile_status" -eq 0 -a "$pep8_status" -eq 0 ]; then
Expand All @@ -88,27 +61,13 @@ else
fi

if [ "$lint_status" -ne 0 ]; then
echo "PEP8 checks failed."
cat "$PEP8_REPORT_PATH"
else
echo "PEP8 checks passed."
fi

rm "$PEP8_REPORT_PATH"

for to_be_checked in "$PATHS_TO_CHECK"
do
pylint --rcfile="$SPARK_ROOT_DIR/pylintrc" $to_be_checked >> "$PYLINT_REPORT_PATH"
done

if [ "${PIPESTATUS[0]}" -ne 0 ]; then
lint_status=1
echo "Pylint checks failed."
cat "$PYLINT_REPORT_PATH"
echo "Python lint checks failed."
cat "$PYTHON_LINT_REPORT_PATH"
else
echo "Pylint checks passed."
echo "Python lint checks passed."
fi

rm "$PYLINT_REPORT_PATH"
# rm "$PEP8_SCRIPT_PATH"
rm "$PYTHON_LINT_REPORT_PATH"

exit "$lint_status"
10 changes: 5 additions & 5 deletions docker/spark-test/base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@

FROM ubuntu:precise

RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt/sources.list

# Upgrade package index
RUN apt-get update

# install a few other useful packages plus Open Jdk 7
RUN apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server
# Remove unneeded /var/lib/apt/lists/* after install to reduce the
# docker image size (by ~30MB)
RUN apt-get update && \
apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server && \
rm -rf /var/lib/apt/lists/*

ENV SCALA_VERSION 2.10.4
ENV CDH_VERSION cdh4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ private[shared] object SharedParamsCodeGen {

s"""
|/**
| * (private[ml]) Trait for shared param $name$defaultValueDoc.
| * Trait for shared param $name$defaultValueDoc.
| */
|private[ml] trait Has$Name extends Params {
|
Expand Down Expand Up @@ -173,7 +173,6 @@ private[shared] object SharedParamsCodeGen {
|package org.apache.spark.ml.param.shared
|
|import org.apache.spark.ml.param._
|import org.apache.spark.util.Utils
|
|// DO NOT MODIFY THIS FILE! It was generated by SharedParamsCodeGen.
|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,13 @@
package org.apache.spark.ml.param.shared

import org.apache.spark.ml.param._
import org.apache.spark.util.Utils

// DO NOT MODIFY THIS FILE! It was generated by SharedParamsCodeGen.

// scalastyle:off

/**
* (private[ml]) Trait for shared param regParam.
* Trait for shared param regParam.
*/
private[ml] trait HasRegParam extends Params {

Expand All @@ -40,7 +39,7 @@ private[ml] trait HasRegParam extends Params {
}

/**
* (private[ml]) Trait for shared param maxIter.
* Trait for shared param maxIter.
*/
private[ml] trait HasMaxIter extends Params {

Expand All @@ -55,7 +54,7 @@ private[ml] trait HasMaxIter extends Params {
}

/**
* (private[ml]) Trait for shared param featuresCol (default: "features").
* Trait for shared param featuresCol (default: "features").
*/
private[ml] trait HasFeaturesCol extends Params {

Expand All @@ -72,7 +71,7 @@ private[ml] trait HasFeaturesCol extends Params {
}

/**
* (private[ml]) Trait for shared param labelCol (default: "label").
* Trait for shared param labelCol (default: "label").
*/
private[ml] trait HasLabelCol extends Params {

Expand All @@ -89,7 +88,7 @@ private[ml] trait HasLabelCol extends Params {
}

/**
* (private[ml]) Trait for shared param predictionCol (default: "prediction").
* Trait for shared param predictionCol (default: "prediction").
*/
private[ml] trait HasPredictionCol extends Params {

Expand All @@ -106,7 +105,7 @@ private[ml] trait HasPredictionCol extends Params {
}

/**
* (private[ml]) Trait for shared param rawPredictionCol (default: "rawPrediction").
* Trait for shared param rawPredictionCol (default: "rawPrediction").
*/
private[ml] trait HasRawPredictionCol extends Params {

Expand All @@ -123,7 +122,7 @@ private[ml] trait HasRawPredictionCol extends Params {
}

/**
* (private[ml]) Trait for shared param probabilityCol (default: "probability").
* Trait for shared param probabilityCol (default: "probability").
*/
private[ml] trait HasProbabilityCol extends Params {

Expand All @@ -140,7 +139,7 @@ private[ml] trait HasProbabilityCol extends Params {
}

/**
* (private[ml]) Trait for shared param threshold.
* Trait for shared param threshold.
*/
private[ml] trait HasThreshold extends Params {

Expand All @@ -155,7 +154,7 @@ private[ml] trait HasThreshold extends Params {
}

/**
* (private[ml]) Trait for shared param inputCol.
* Trait for shared param inputCol.
*/
private[ml] trait HasInputCol extends Params {

Expand All @@ -170,7 +169,7 @@ private[ml] trait HasInputCol extends Params {
}

/**
* (private[ml]) Trait for shared param inputCols.
* Trait for shared param inputCols.
*/
private[ml] trait HasInputCols extends Params {

Expand All @@ -185,7 +184,7 @@ private[ml] trait HasInputCols extends Params {
}

/**
* (private[ml]) Trait for shared param outputCol (default: uid + "__output").
* Trait for shared param outputCol (default: uid + "__output").
*/
private[ml] trait HasOutputCol extends Params {

Expand All @@ -202,7 +201,7 @@ private[ml] trait HasOutputCol extends Params {
}

/**
* (private[ml]) Trait for shared param checkpointInterval.
* Trait for shared param checkpointInterval.
*/
private[ml] trait HasCheckpointInterval extends Params {

Expand All @@ -217,7 +216,7 @@ private[ml] trait HasCheckpointInterval extends Params {
}

/**
* (private[ml]) Trait for shared param fitIntercept (default: true).
* Trait for shared param fitIntercept (default: true).
*/
private[ml] trait HasFitIntercept extends Params {

Expand All @@ -234,7 +233,7 @@ private[ml] trait HasFitIntercept extends Params {
}

/**
* (private[ml]) Trait for shared param standardization (default: true).
* Trait for shared param standardization (default: true).
*/
private[ml] trait HasStandardization extends Params {

Expand All @@ -251,7 +250,7 @@ private[ml] trait HasStandardization extends Params {
}

/**
* (private[ml]) Trait for shared param seed (default: this.getClass.getName.hashCode.toLong).
* Trait for shared param seed (default: this.getClass.getName.hashCode.toLong).
*/
private[ml] trait HasSeed extends Params {

Expand All @@ -268,7 +267,7 @@ private[ml] trait HasSeed extends Params {
}

/**
* (private[ml]) Trait for shared param elasticNetParam.
* Trait for shared param elasticNetParam.
*/
private[ml] trait HasElasticNetParam extends Params {

Expand All @@ -283,7 +282,7 @@ private[ml] trait HasElasticNetParam extends Params {
}

/**
* (private[ml]) Trait for shared param tol.
* Trait for shared param tol.
*/
private[ml] trait HasTol extends Params {

Expand All @@ -298,7 +297,7 @@ private[ml] trait HasTol extends Params {
}

/**
* (private[ml]) Trait for shared param stepSize.
* Trait for shared param stepSize.
*/
private[ml] trait HasStepSize extends Params {

Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@
<yarn.version>${hadoop.version}</yarn.version>
<hbase.version>0.98.7-hadoop2</hbase.version>
<hbase.artifact>hbase</hbase.artifact>
<flume.version>1.4.0</flume.version>
<flume.version>1.6.0</flume.version>
<zookeeper.version>3.4.5</zookeeper.version>
<curator.version>2.4.0</curator.version>
<hive.group>org.spark-project.hive</hive.group>
Expand Down
Loading

0 comments on commit e3e96f7

Please sign in to comment.