Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/apache/spark into introdu…
Browse files Browse the repository at this point in the history
…ce-eslint
  • Loading branch information
sarutak committed May 6, 2021
2 parents 1d67631 + c6d3f37 commit 82a9947
Show file tree
Hide file tree
Showing 585 changed files with 27,869 additions and 21,023 deletions.
2 changes: 2 additions & 0 deletions .github/PULL_REQUEST_TEMPLATE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ Thanks for sending a pull request! Here are some tips for you:
6. If possible, provide a concise example to reproduce the issue for a faster review.
7. If you want to add a new configuration, please read the guideline first for naming configurations in
'core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala'.
8. If you want to add or modify an error message, please read the guideline first:
https://spark.apache.org/error-message-guidelines.html
-->

### What changes were proposed in this pull request?
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ jobs:
# To keep the directory structure and file permissions, tar them
# See also https://github.com/actions/upload-artifact#maintaining-file-permissions-and-case-sensitive-files
echo "Preparing the benchmark results:"
tar -cvf benchmark-results-${{ github.event.inputs.jdk }}.tar `git diff --name-only`
tar -cvf benchmark-results-${{ github.event.inputs.jdk }}.tar `git diff --name-only` `git ls-files --others --exclude-standard`
- name: Upload benchmark results
uses: actions/upload-artifact@v2
with:
Expand Down
31 changes: 18 additions & 13 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -493,19 +493,6 @@ jobs:
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache TPC-DS generated data
id: cache-tpcds-sf-1
uses: actions/cache@v2
with:
path: ./tpcds-sf-1
key: tpcds-556111e35d400f56cb0625dc16e9063d54628320
- name: Checkout TPC-DS (SF=1) generated data repository
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
uses: actions/checkout@v2
with:
repository: maropu/spark-tpcds-sf-1
ref: 556111e35d400f56cb0625dc16e9063d54628320
path: ./tpcds-sf-1
- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
with:
Expand All @@ -528,6 +515,24 @@ jobs:
uses: actions/setup-java@v1
with:
java-version: 8
- name: Cache TPC-DS generated data
id: cache-tpcds-sf-1
uses: actions/cache@v2
with:
path: ./tpcds-sf-1
key: tpcds-${{ hashFiles('sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
- name: Checkout tpcds-kit repository
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
uses: actions/checkout@v2
with:
repository: databricks/tpcds-kit
path: ./tpcds-kit
- name: Build tpcds-kit
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
run: cd tpcds-kit/tools && make OS=LINUX
- name: Generate TPC-DS (SF=1) table data
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
run: build/sbt "sql/test:runMain org.apache.spark.sql.GenTPCDSData --dsdgenDir `pwd`/tpcds-kit/tools --location `pwd`/tpcds-sf-1 --scaleFactor 1 --numPartitions 1 --overwrite"
- name: Run TPC-DS queries
run: |
SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
.ensime_cache/
.ensime_lucene
.generated-mima*
.idea/
# The star is required for further !.idea/ to work, see https://git-scm.com/docs/gitignore
.idea/*
!.idea/vcs.xml
.idea_modules/
.project
.pydevproject
Expand Down
36 changes: 36 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 0 additions & 13 deletions LICENSE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ javax.jdo:jdo-api
joda-time:joda-time
net.sf.opencsv:opencsv
org.apache.derby:derby
org.ehcache:ehcache
org.objenesis:objenesis
org.roaringbitmap:RoaringBitmap
org.scalanlp:breeze-macros_2.12
Expand Down Expand Up @@ -261,7 +260,6 @@ net.sf.supercsv:super-csv
org.apache.arrow:arrow-format
org.apache.arrow:arrow-memory
org.apache.arrow:arrow-vector
org.apache.commons:commons-configuration2
org.apache.commons:commons-crypto
org.apache.commons:commons-lang3
org.apache.hadoop:hadoop-annotations
Expand Down Expand Up @@ -296,7 +294,6 @@ org.apache.kerby:kerby-config
org.apache.kerby:kerby-pkix
org.apache.kerby:kerby-util
org.apache.kerby:kerby-xdr
org.apache.kerby:token-provider
org.apache.orc:orc-core
org.apache.orc:orc-mapreduce
org.mortbay.jetty:jetty
Expand All @@ -316,19 +313,15 @@ com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider
com.fasterxml.jackson.module:jackson-module-jaxb-annotations
com.fasterxml.jackson.module:jackson-module-paranamer
com.fasterxml.jackson.module:jackson-module-scala_2.12
com.fasterxml.woodstox:woodstox-core
com.github.mifmif:generex
com.github.stephenc.jcip:jcip-annotations
com.google.code.findbugs:jsr305
com.google.code.gson:gson
com.google.flatbuffers:flatbuffers-java
com.google.guava:guava
com.google.inject:guice
com.google.inject.extensions:guice-servlet
com.nimbusds:nimbus-jose-jwt
com.twitter:parquet-hadoop-bundle
commons-cli:commons-cli
commons-daemon:commons-daemon
commons-dbcp:commons-dbcp
commons-io:commons-io
commons-lang:commons-lang
Expand All @@ -340,8 +333,6 @@ javax.inject:javax.inject
javax.validation:validation-api
log4j:apache-log4j-extras
log4j:log4j
net.minidev:accessors-smart
net.minidev:json-smart
net.sf.jpam:jpam
org.apache.avro:avro
org.apache.avro:avro-ipc
Expand All @@ -357,7 +348,6 @@ org.apache.directory.server:apacheds-i18n
org.apache.directory.server:apacheds-kerberos-codec
org.apache.htrace:htrace-core
org.apache.ivy:ivy
org.apache.geronimo.specs:geronimo-jcache_1.0_spec
org.apache.mesos:mesos
org.apache.parquet:parquet-column
org.apache.parquet:parquet-common
Expand Down Expand Up @@ -432,15 +422,13 @@ BSD 2-Clause
------------

com.github.luben:zstd-jni
dnsjava:dnsjava
javolution:javolution
com.esotericsoftware:kryo-shaded
com.esotericsoftware:minlog
com.esotericsoftware:reflectasm
com.google.protobuf:protobuf-java
org.codehaus.janino:commons-compiler
org.codehaus.janino:janino
org.codehaus.woodstox:stax2-api
jline:jline
org.jodd:jodd-core
com.github.wendykierp:JTransforms
Expand All @@ -457,7 +445,6 @@ org.antlr:stringtemplate
org.antlr:antlr4-runtime
antlr:antlr
com.github.fommil.netlib:core
com.google.re2j:re2j
com.thoughtworks.paranamer:paranamer
org.scala-lang:scala-compiler
org.scala-lang:scala-library
Expand Down
6 changes: 5 additions & 1 deletion R/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,14 @@ export R_HOME=/home/username/R

#### Build Spark

Build Spark with [Maven](https://spark.apache.org/docs/latest/building-spark.html#buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
Build Spark with [Maven](https://spark.apache.org/docs/latest/building-spark.html#buildmvn) or [SBT](https://spark.apache.org/docs/latest/building-spark.html#building-with-sbt), and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run

```bash
# Maven
./build/mvn -DskipTests -Psparkr package

# SBT
./build/sbt -Psparkr package
```

#### Running sparkR
Expand Down
1 change: 1 addition & 0 deletions R/pkg/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Depends:
Suggests:
knitr,
rmarkdown,
markdown,
testthat,
e1071,
survival,
Expand Down
6 changes: 3 additions & 3 deletions R/pkg/tests/fulltests/test_mllib_classification.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,14 @@ test_that("spark.svmLinear", {
expect_true(class(summary$coefficients[, 1]) == "numeric")

coefs <- summary$coefficients[, "Estimate"]
expected_coefs <- c(-0.06004978, -0.1563083, -0.460648, 0.2276626, 1.055085)
expected_coefs <- c(-6.8823988, -0.6154984, -1.5135447, 1.9694126, 3.3736856)
expect_true(all(abs(coefs - expected_coefs) < 0.1))

# Test prediction with string label
prediction <- predict(model, training)
expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "character")
expected <- c("versicolor", "versicolor", "versicolor", "virginica", "virginica",
"virginica", "virginica", "virginica", "virginica", "virginica")
expected <- c("versicolor", "versicolor", "versicolor", "versicolor", "versicolor",
"versicolor", "versicolor", "versicolor", "versicolor", "versicolor")
expect_equal(sort(as.list(take(select(prediction, "prediction"), 10))[[1]]), expected)

# Test model save and load
Expand Down
17 changes: 9 additions & 8 deletions build/mvn
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ _COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g"
## Arg2 - Tarball Name
## Arg3 - Checkable Binary
install_app() {
local remote_tarball="$1/$2"
local remote_tarball="$1"
local local_tarball="${_DIR}/$2"
local binary="${_DIR}/$3"

Expand Down Expand Up @@ -71,19 +71,20 @@ install_mvn() {
local MVN_DETECTED_VERSION="$(mvn --version | head -n1 | awk '{print $3}')"
fi
if [ $(version $MVN_DETECTED_VERSION) -lt $(version $MVN_VERSION) ]; then
local APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua?action=download&filename='}

local FILE_PATH="maven/maven-3/${MVN_VERSION}/binaries/apache-maven-${MVN_VERSION}-bin.tar.gz"
local APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua'}
local MIRROR_URL="${APACHE_MIRROR}/${FILE_PATH}?action=download"

if [ $(command -v curl) ]; then
local TEST_MIRROR_URL="${APACHE_MIRROR}/maven/maven-3/${MVN_VERSION}/binaries/apache-maven-${MVN_VERSION}-bin.tar.gz"
if ! curl -L --output /dev/null --silent --head --fail "$TEST_MIRROR_URL" ; then
if ! curl -L --output /dev/null --silent --head --fail "${MIRROR_URL}" ; then
# Fall back to archive.apache.org for older Maven
echo "Falling back to archive.apache.org to download Maven"
APACHE_MIRROR="https://archive.apache.org/dist"
MIRROR_URL="https://archive.apache.org/dist/${FILE_PATH}"
fi
fi

install_app \
"${APACHE_MIRROR}/maven/maven-3/${MVN_VERSION}/binaries" \
"${MIRROR_URL}" \
"apache-maven-${MVN_VERSION}-bin.tar.gz" \
"apache-maven-${MVN_VERSION}/bin/mvn"

Expand All @@ -102,7 +103,7 @@ install_scala() {
local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.lightbend.com}

install_app \
"${TYPESAFE_MIRROR}/scala/${scala_version}" \
"${TYPESAFE_MIRROR}/scala/${scala_version}/scala-${scala_version}.tgz" \
"scala-${scala_version}.tgz" \
"scala-${scala_version}/bin/scala"

Expand Down
8 changes: 5 additions & 3 deletions conf/log4j.properties.template
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@ log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

# Set the default spark-shell log level to WARN. When running the spark-shell, the
# log level for this class is used to overwrite the root logger's log level, so that
# the user can have different defaults for the shell and regular Spark apps.
# Set the default spark-shell/spark-sql log level to WARN. When running the
# spark-shell/spark-sql, the log level for these classes is used to overwrite
# the root logger's log level, so that the user can have different defaults
# for the shell and regular Spark apps.
log4j.logger.org.apache.spark.repl.Main=WARN
log4j.logger.org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver=WARN

# Settings to quiet third party logs that are too verbose
log4j.logger.org.sparkproject.jetty=WARN
Expand Down
47 changes: 42 additions & 5 deletions core/src/main/resources/org/apache/spark/ui/static/stagepage.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,23 @@ $.extend( $.fn.dataTable.ext.type.order, {
a = ConvertDurationString( a );
b = ConvertDurationString( b );
return ((a < b) ? 1 : ((a > b) ? -1 : 0));
},

"size-pre": function (data) {
var floatValue = parseFloat(data)
return isNaN(floatValue) ? 0 : floatValue;
},

"size-asc": function (a, b) {
a = parseFloat(a);
b = parseFloat(b);
return ((a < b) ? -1 : ((a > b) ? 1 : 0));
},

"size-desc": function (a, b) {
a = parseFloat(a);
b = parseFloat(b);
return ((a < b) ? 1 : ((a > b) ? -1 : 0));
}
} );

Expand Down Expand Up @@ -562,10 +579,27 @@ $(document).ready(function () {
}
],
"columnDefs": [
{ "visible": false, "targets": 15 },
{ "visible": false, "targets": 16 },
{ "visible": false, "targets": 17 },
{ "visible": false, "targets": 18 }
// SPARK-35087 [type:size] means String with structures like : 'size / records',
// they should be sorted as numerical-order instead of lexicographical-order by default.
// The targets: $id represents column id which comes from stagespage-template.html
// #summary-executor-table.If the relative position of the columns in the table
// #summary-executor-table has changed,please be careful to adjust the column index here
// Input Size / Records
{"type": "size", "targets": 9},
// Output Size / Records
{"type": "size", "targets": 10},
// Shuffle Read Size / Records
{"type": "size", "targets": 11},
// Shuffle Write Size / Records
{"type": "size", "targets": 12},
// Peak JVM Memory OnHeap / OffHeap
{"visible": false, "targets": 15},
// Peak Execution Memory OnHeap / OffHeap
{"visible": false, "targets": 16},
// Peak Storage Memory OnHeap / OffHeap
{"visible": false, "targets": 17},
// Peak Pool Memory Direct / Mapped
{"visible": false, "targets": 18}
],
"deferRender": true,
"order": [[0, "asc"]],
Expand Down Expand Up @@ -746,7 +780,7 @@ $(document).ready(function () {
"paging": true,
"info": true,
"processing": true,
"lengthMenu": [[20, 40, 60, 100, totalTasksToShow], [20, 40, 60, 100, "All"]],
"lengthMenu": [[20, 40, 60, 100, -1], [20, 40, 60, 100, "All"]],
"orderMulti": false,
"bAutoWidth": false,
"ajax": {
Expand All @@ -762,6 +796,9 @@ $(document).ready(function () {
data.numTasks = totalTasksToShow;
data.columnIndexToSort = columnIndexToSort;
data.columnNameToSort = columnNameToSort;
if (data.length === -1) {
data.length = totalTasksToShow;
}
},
"dataSrc": function (jsons) {
var jsonStr = JSON.stringify(jsons);
Expand Down

0 comments on commit 82a9947

Please sign in to comment.