Merge branch 'master' of https://github.com/apache/spark into introdu…

…ce-eslint
apache · May 6, 2021 · 82a9947 · 82a9947
2 parents 1d67631 + c6d3f37
commit 82a9947
Show file tree

Hide file tree

Showing 585 changed files with 27,869 additions and 21,023 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE
@@ -8,6 +8,8 @@ Thanks for sending a pull request!  Here are some tips for you:
   6. If possible, provide a concise example to reproduce the issue for a faster review.
   7. If you want to add a new configuration, please read the guideline first for naming configurations in
      'core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala'.
+  8. If you want to add or modify an error message, please read the guideline first:
+     https://spark.apache.org/error-message-guidelines.html
 -->
 
 ### What changes were proposed in this pull request?

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -91,7 +91,7 @@ jobs:
         # To keep the directory structure and file permissions, tar them
         # See also https://github.com/actions/upload-artifact#maintaining-file-permissions-and-case-sensitive-files
         echo "Preparing the benchmark results:"
-        tar -cvf benchmark-results-${{ github.event.inputs.jdk }}.tar `git diff --name-only`
+        tar -cvf benchmark-results-${{ github.event.inputs.jdk }}.tar `git diff --name-only` `git ls-files --others --exclude-standard`
     - name: Upload benchmark results
       uses: actions/upload-artifact@v2
       with:

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -493,19 +493,6 @@ jobs:
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
-    - name: Cache TPC-DS generated data
-      id: cache-tpcds-sf-1
-      uses: actions/cache@v2
-      with:
-        path: ./tpcds-sf-1
-        key: tpcds-556111e35d400f56cb0625dc16e9063d54628320
-    - name: Checkout TPC-DS (SF=1) generated data repository
-      if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
-      uses: actions/checkout@v2
-      with:
-        repository: maropu/spark-tpcds-sf-1
-        ref: 556111e35d400f56cb0625dc16e9063d54628320
-        path: ./tpcds-sf-1
     - name: Cache Scala, SBT and Maven
       uses: actions/cache@v2
       with:
@@ -528,6 +515,24 @@ jobs:
       uses: actions/setup-java@v1
       with:
         java-version: 8
+    - name: Cache TPC-DS generated data
+      id: cache-tpcds-sf-1
+      uses: actions/cache@v2
+      with:
+        path: ./tpcds-sf-1
+        key: tpcds-${{ hashFiles('sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
+    - name: Checkout tpcds-kit repository
+      if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
+      uses: actions/checkout@v2
+      with:
+        repository: databricks/tpcds-kit
+        path: ./tpcds-kit
+    - name: Build tpcds-kit
+      if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
+      run: cd tpcds-kit/tools && make OS=LINUX
+    - name: Generate TPC-DS (SF=1) table data
+      if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
+      run: build/sbt "sql/test:runMain org.apache.spark.sql.GenTPCDSData --dsdgenDir `pwd`/tpcds-kit/tools --location `pwd`/tpcds-sf-1 --scaleFactor 1 --numPartitions 1 --overwrite"
     - name: Run TPC-DS queries
       run: |
         SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"

diff --git a/.gitignore b/.gitignore
@@ -15,7 +15,9 @@
 .ensime_cache/
 .ensime_lucene
 .generated-mima*
-.idea/
+# The star is required for further !.idea/ to work, see https://git-scm.com/docs/gitignore
+.idea/*
+!.idea/vcs.xml
 .idea_modules/
 .project
 .pydevproject

diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/LICENSE-binary b/LICENSE-binary
@@ -218,7 +218,6 @@ javax.jdo:jdo-api
 joda-time:joda-time
 net.sf.opencsv:opencsv
 org.apache.derby:derby
-org.ehcache:ehcache
 org.objenesis:objenesis
 org.roaringbitmap:RoaringBitmap
 org.scalanlp:breeze-macros_2.12
@@ -261,7 +260,6 @@ net.sf.supercsv:super-csv
 org.apache.arrow:arrow-format
 org.apache.arrow:arrow-memory
 org.apache.arrow:arrow-vector
-org.apache.commons:commons-configuration2
 org.apache.commons:commons-crypto
 org.apache.commons:commons-lang3
 org.apache.hadoop:hadoop-annotations
@@ -296,7 +294,6 @@ org.apache.kerby:kerby-config
 org.apache.kerby:kerby-pkix
 org.apache.kerby:kerby-util
 org.apache.kerby:kerby-xdr
-org.apache.kerby:token-provider
 org.apache.orc:orc-core
 org.apache.orc:orc-mapreduce
 org.mortbay.jetty:jetty
@@ -316,19 +313,15 @@ com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider
 com.fasterxml.jackson.module:jackson-module-jaxb-annotations
 com.fasterxml.jackson.module:jackson-module-paranamer
 com.fasterxml.jackson.module:jackson-module-scala_2.12
-com.fasterxml.woodstox:woodstox-core
 com.github.mifmif:generex
-com.github.stephenc.jcip:jcip-annotations
 com.google.code.findbugs:jsr305
 com.google.code.gson:gson
 com.google.flatbuffers:flatbuffers-java
 com.google.guava:guava
 com.google.inject:guice
 com.google.inject.extensions:guice-servlet
-com.nimbusds:nimbus-jose-jwt
 com.twitter:parquet-hadoop-bundle
 commons-cli:commons-cli
-commons-daemon:commons-daemon
 commons-dbcp:commons-dbcp
 commons-io:commons-io
 commons-lang:commons-lang
@@ -340,8 +333,6 @@ javax.inject:javax.inject
 javax.validation:validation-api
 log4j:apache-log4j-extras
 log4j:log4j
-net.minidev:accessors-smart
-net.minidev:json-smart
 net.sf.jpam:jpam
 org.apache.avro:avro
 org.apache.avro:avro-ipc
@@ -357,7 +348,6 @@ org.apache.directory.server:apacheds-i18n
 org.apache.directory.server:apacheds-kerberos-codec
 org.apache.htrace:htrace-core
 org.apache.ivy:ivy
-org.apache.geronimo.specs:geronimo-jcache_1.0_spec
 org.apache.mesos:mesos
 org.apache.parquet:parquet-column
 org.apache.parquet:parquet-common
@@ -432,15 +422,13 @@ BSD 2-Clause
 ------------
 
 com.github.luben:zstd-jni
-dnsjava:dnsjava
 javolution:javolution
 com.esotericsoftware:kryo-shaded
 com.esotericsoftware:minlog
 com.esotericsoftware:reflectasm
 com.google.protobuf:protobuf-java
 org.codehaus.janino:commons-compiler
 org.codehaus.janino:janino
-org.codehaus.woodstox:stax2-api
 jline:jline
 org.jodd:jodd-core
 com.github.wendykierp:JTransforms
@@ -457,7 +445,6 @@ org.antlr:stringtemplate
 org.antlr:antlr4-runtime
 antlr:antlr
 com.github.fommil.netlib:core
-com.google.re2j:re2j
 com.thoughtworks.paranamer:paranamer
 org.scala-lang:scala-compiler
 org.scala-lang:scala-library

diff --git a/R/README.md b/R/README.md
@@ -17,10 +17,14 @@ export R_HOME=/home/username/R
 
 #### Build Spark
 
-Build Spark with [Maven](https://spark.apache.org/docs/latest/building-spark.html#buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
+Build Spark with [Maven](https://spark.apache.org/docs/latest/building-spark.html#buildmvn) or [SBT](https://spark.apache.org/docs/latest/building-spark.html#building-with-sbt), and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
 
 ```bash
+# Maven
 ./build/mvn -DskipTests -Psparkr package
+
+# SBT
+./build/sbt -Psparkr package
 ```
 
 #### Running sparkR

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
@@ -20,6 +20,7 @@ Depends:
 Suggests:
     knitr,
     rmarkdown,
+    markdown,
     testthat,
     e1071,
     survival,

diff --git a/R/pkg/tests/fulltests/test_mllib_classification.R b/R/pkg/tests/fulltests/test_mllib_classification.R
@@ -38,14 +38,14 @@ test_that("spark.svmLinear", {
   expect_true(class(summary$coefficients[, 1]) == "numeric")
 
   coefs <- summary$coefficients[, "Estimate"]
-  expected_coefs <- c(-0.06004978, -0.1563083, -0.460648, 0.2276626, 1.055085)
+  expected_coefs <- c(-6.8823988, -0.6154984, -1.5135447, 1.9694126, 3.3736856)
   expect_true(all(abs(coefs - expected_coefs) < 0.1))
 
   # Test prediction with string label
   prediction <- predict(model, training)
   expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "character")
-  expected <- c("versicolor", "versicolor", "versicolor", "virginica",  "virginica",
-                "virginica",  "virginica",  "virginica",  "virginica",  "virginica")
+  expected <- c("versicolor", "versicolor", "versicolor", "versicolor",  "versicolor",
+                "versicolor",  "versicolor",  "versicolor",  "versicolor",  "versicolor")
   expect_equal(sort(as.list(take(select(prediction, "prediction"), 10))[[1]]), expected)
 
   # Test model save and load

diff --git a/build/mvn b/build/mvn
@@ -31,7 +31,7 @@ _COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g"
 ## Arg2 - Tarball Name
 ## Arg3 - Checkable Binary
 install_app() {
-  local remote_tarball="$1/$2"
+  local remote_tarball="$1"
   local local_tarball="${_DIR}/$2"
   local binary="${_DIR}/$3"
 
@@ -71,19 +71,20 @@ install_mvn() {
     local MVN_DETECTED_VERSION="$(mvn --version | head -n1 | awk '{print $3}')"
   fi
   if [ $(version $MVN_DETECTED_VERSION) -lt $(version $MVN_VERSION) ]; then
-    local APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua?action=download&filename='}
-
+    local FILE_PATH="maven/maven-3/${MVN_VERSION}/binaries/apache-maven-${MVN_VERSION}-bin.tar.gz"
+    local APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua'}
+    local MIRROR_URL="${APACHE_MIRROR}/${FILE_PATH}?action=download"
+
     if [ $(command -v curl) ]; then
-      local TEST_MIRROR_URL="${APACHE_MIRROR}/maven/maven-3/${MVN_VERSION}/binaries/apache-maven-${MVN_VERSION}-bin.tar.gz"
-      if ! curl -L --output /dev/null --silent --head --fail "$TEST_MIRROR_URL" ; then
+      if ! curl -L --output /dev/null --silent --head --fail "${MIRROR_URL}" ; then
         # Fall back to archive.apache.org for older Maven
         echo "Falling back to archive.apache.org to download Maven"
-        APACHE_MIRROR="https://archive.apache.org/dist"
+        MIRROR_URL="https://archive.apache.org/dist/${FILE_PATH}"
       fi
     fi
 
     install_app \
-      "${APACHE_MIRROR}/maven/maven-3/${MVN_VERSION}/binaries" \
+      "${MIRROR_URL}" \
       "apache-maven-${MVN_VERSION}-bin.tar.gz" \
       "apache-maven-${MVN_VERSION}/bin/mvn"
 
@@ -102,7 +103,7 @@ install_scala() {
   local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.lightbend.com}
 
   install_app \
-    "${TYPESAFE_MIRROR}/scala/${scala_version}" \
+    "${TYPESAFE_MIRROR}/scala/${scala_version}/scala-${scala_version}.tgz" \
     "scala-${scala_version}.tgz" \
     "scala-${scala_version}/bin/scala"
 

diff --git a/conf/log4j.properties.template b/conf/log4j.properties.template
@@ -22,10 +22,12 @@ log4j.appender.console.target=System.err
 log4j.appender.console.layout=org.apache.log4j.PatternLayout
 log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
 
-# Set the default spark-shell log level to WARN. When running the spark-shell, the
-# log level for this class is used to overwrite the root logger's log level, so that
-# the user can have different defaults for the shell and regular Spark apps.
+# Set the default spark-shell/spark-sql log level to WARN. When running the
+# spark-shell/spark-sql, the log level for these classes is used to overwrite
+# the root logger's log level, so that the user can have different defaults
+# for the shell and regular Spark apps.
 log4j.logger.org.apache.spark.repl.Main=WARN
+log4j.logger.org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver=WARN
 
 # Settings to quiet third party logs that are too verbose
 log4j.logger.org.sparkproject.jetty=WARN

diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -43,6 +43,23 @@ $.extend( $.fn.dataTable.ext.type.order, {
         a = ConvertDurationString( a );
         b = ConvertDurationString( b );
         return ((a < b) ? 1 : ((a > b) ? -1 : 0));
+    },
+
+    "size-pre": function (data) {
+        var floatValue = parseFloat(data)
+        return isNaN(floatValue) ? 0 : floatValue;
+    },
+
+    "size-asc": function (a, b) {
+        a = parseFloat(a);
+        b = parseFloat(b);
+        return ((a < b) ? -1 : ((a > b) ? 1 : 0));
+    },
+
+    "size-desc": function (a, b) {
+        a = parseFloat(a);
+        b = parseFloat(b);
+        return ((a < b) ? 1 : ((a > b) ? -1 : 0));
     }
 } );
 
@@ -562,10 +579,27 @@ $(document).ready(function () {
                         }
                     ],
                     "columnDefs": [
-                        { "visible": false, "targets": 15 },
-                        { "visible": false, "targets": 16 },
-                        { "visible": false, "targets": 17 },
-                        { "visible": false, "targets": 18 }
+                        // SPARK-35087 [type:size] means String with structures like : 'size / records',
+                        // they should be sorted as numerical-order instead of lexicographical-order by default.
+                        // The targets: $id represents column id which comes from stagespage-template.html
+                        // #summary-executor-table.If the relative position of the columns in the table
+                        // #summary-executor-table has changed,please be careful to adjust the column index here
+                        // Input Size / Records
+                        {"type": "size", "targets": 9},
+                        // Output Size / Records
+                        {"type": "size", "targets": 10},
+                        // Shuffle Read Size / Records
+                        {"type": "size", "targets": 11},
+                        // Shuffle Write Size / Records
+                        {"type": "size", "targets": 12},
+                        // Peak JVM Memory OnHeap / OffHeap
+                        {"visible": false, "targets": 15},
+                        // Peak Execution Memory OnHeap / OffHeap
+                        {"visible": false, "targets": 16},
+                        // Peak Storage Memory OnHeap / OffHeap
+                        {"visible": false, "targets": 17},
+                        // Peak Pool Memory Direct / Mapped
+                        {"visible": false, "targets": 18}
                     ],
                     "deferRender": true,
                     "order": [[0, "asc"]],
@@ -746,7 +780,7 @@ $(document).ready(function () {
                     "paging": true,
                     "info": true,
                     "processing": true,
-                    "lengthMenu": [[20, 40, 60, 100, totalTasksToShow], [20, 40, 60, 100, "All"]],
+                    "lengthMenu": [[20, 40, 60, 100, -1], [20, 40, 60, 100, "All"]],
                     "orderMulti": false,
                     "bAutoWidth": false,
                     "ajax": {
@@ -762,6 +796,9 @@ $(document).ready(function () {
                             data.numTasks = totalTasksToShow;
                             data.columnIndexToSort = columnIndexToSort;
                             data.columnNameToSort = columnNameToSort;
+                            if (data.length === -1) {
+                                data.length = totalTasksToShow;
+                            }
                         },
                         "dataSrc": function (jsons) {
                             var jsonStr = JSON.stringify(jsons);