From bb44edcabbaf41edaf13c7bcd6dd62b246db7140 Mon Sep 17 00:00:00 2001
From: Shardul Mahadik <smahadik@linkedin.com>
Date: Tue, 23 Feb 2021 01:37:53 -0800
Subject: [PATCH 1/4] ADD JAR with ivy coordinates should be compatible with
 Hive behavior

---
 .../apache/spark/util/DependencyUtils.scala   | 13 ++++----
 .../org/apache/spark/SparkContextSuite.scala  | 33 +++++++++++++------
 ...ql-ref-syntax-aux-resource-mgmt-add-jar.md |  2 +-
 3 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
index 60e866a556796..9f3b4da6181f9 100644
--- a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
@@ -59,8 +59,8 @@ private[spark] object DependencyUtils extends Logging {
    * @param uri Ivy URI need to be downloaded.
    * @return Tuple value of parameter `transitive` and `exclude` value.
    *
-   *         1. transitive: whether to download dependency jar of Ivy URI, default value is false
-   *            and this parameter value is case-sensitive. Invalid value will be treat as false.
+   *         1. transitive: whether to download dependency jar of Ivy URI, default value is true
+   *            and this parameter value is case-insensitive. Invalid value will be treat as false.
    *            Example: Input:  exclude=org.mortbay.jetty:jetty&transitive=true
    *            Output:  true
    *
@@ -72,7 +72,7 @@ private[spark] object DependencyUtils extends Logging {
   private def parseQueryParams(uri: URI): (Boolean, String) = {
     val uriQuery = uri.getQuery
     if (uriQuery == null) {
-      (false, "")
+      (true, "")
     } else {
       val mapTokens = uriQuery.split("&").map(_.split("="))
       if (mapTokens.exists(isInvalidQueryString)) {
@@ -81,14 +81,15 @@ private[spark] object DependencyUtils extends Logging {
       }
       val groupedParams = mapTokens.map(kv => (kv(0), kv(1))).groupBy(_._1)
 
-      // Parse transitive parameters (e.g., transitive=true) in an Ivy URI, default value is false
+      // Parse transitive parameters (e.g., transitive=true) in an Ivy URI, default value is true
       val transitiveParams = groupedParams.get("transitive")
       if (transitiveParams.map(_.size).getOrElse(0) > 1) {
         logWarning("It's best to specify `transitive` parameter in ivy URI query only once." +
           " If there are multiple `transitive` parameter, we will select the last one")
       }
       val transitive =
-        transitiveParams.flatMap(_.takeRight(1).map(_._2 == "true").headOption).getOrElse(false)
+        transitiveParams.flatMap(_.takeRight(1).map(_._2.equalsIgnoreCase("true")).headOption)
+          .getOrElse(true)
 
       // Parse an excluded list (e.g., exclude=org.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http)
       // in an Ivy URI. When download Ivy URI jar, Spark won't download transitive jar
@@ -125,7 +126,7 @@ private[spark] object DependencyUtils extends Logging {
    *              `parameter=value&parameter=value...`
    *            Note that currently Ivy URI query part support two parameters:
    *             1. transitive: whether to download dependent jars related to your Ivy URI.
-   *                transitive=false or `transitive=true`, if not set, the default value is false.
+   *                transitive=false or `transitive=true`, if not set, the default value is true.
    *             2. exclude: exclusion list when download Ivy URI jar and dependency jars.
    *                The `exclude` parameter content is a ',' separated `group:module` pair string :
    *                `exclude=group:module,group:module...`
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 7a4970e60e932..0ba2a030dac12 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -1035,13 +1035,10 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     }
   }
 
-  test("SPARK-33084: Add jar support Ivy URI -- default transitive = false") {
+  test("SPARK-33084: Add jar support Ivy URI -- default transitive = true") {
     sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
     sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0")
     assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
-    assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
-
-    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")
     assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
   }
 
@@ -1083,6 +1080,22 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     }
   }
 
+  test("SPARK-34506: Add jar support Ivy URI -- transitive=false will not download " +
+    "dependency jars") {
+    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=false")
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+  }
+
+  test("SPARK-34506: Add jar support Ivy URI -- test exclude param when transitive unspecified") {
+    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?exclude=commons-lang:commons-lang")
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(sc.listJars().exists(_.contains("org.slf4j_slf4j-api-1.7.10.jar")))
+    assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+  }
+
   test("SPARK-33084: Add jar support Ivy URI -- test exclude param when transitive=true") {
     sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
     sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0" +
@@ -1131,24 +1144,24 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
 
   test("SPARK-33084: Add jar support Ivy URI -- test param key case sensitive") {
     sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
-    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?TRANSITIVE=true")
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=false")
     assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
     assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
 
-    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?TRANSITIVE=false")
     assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
     assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
   }
 
-  test("SPARK-33084: Add jar support Ivy URI -- test transitive value case sensitive") {
+  test("SPARK-33084: Add jar support Ivy URI -- test transitive value case insensitive") {
     sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
-    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=TRUE")
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=FALSE")
     assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
     assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
 
-    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=false")
     assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
-    assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+    assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
   }
 
   test("SPARK-34346: hadoop configuration priority for spark/hive/hadoop configs") {
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md b/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
index 6d31125fd612d..f1a7377bd6ff3 100644
--- a/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
+++ b/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
@@ -36,7 +36,7 @@ ADD JAR file_name
     The name of the JAR file to be added. It could be either on a local file system or a distributed file system or an Ivy URI.
     Apache Ivy is a popular dependency manager focusing on flexibility and simplicity. Now we support two parameter in URI query string:
 
-     * transitive: whether to download dependent jars related to your ivy URL. It is case-sensitive and only take last one if multiple transitive parameters are specified.
+     * transitive: whether to download dependent jars related to your ivy URL. The parameter name is case-sensitive, parameter value is case-insensitive and only take last one if multiple transitive parameters are specified.
      * exclude: exclusion list during downloading Ivy URI jar and dependent jars.
 
     User can write Ivy URI such as:

From 7089d52eb5922d7a465885fceccbd8efe447a867 Mon Sep 17 00:00:00 2001
From: Shardul Mahadik <smahadik@linkedin.com>
Date: Tue, 23 Feb 2021 04:10:49 -0800
Subject: [PATCH 2/4] Fix tests

---
 .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala   | 8 ++++----
 .../apache/spark/sql/hive/execution/HiveQuerySuite.scala  | 5 ++++-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index e6a18c3894497..b85b64bad0240 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3726,13 +3726,13 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
   test("SPARK-33084: Add jar support Ivy URI in SQL") {
     val sc = spark.sparkContext
     val hiveVersion = "2.3.8"
-    // default transitive=false, only download specified jar
-    sql(s"ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:$hiveVersion")
+    // transitive=false, only download specified jar
+    sql(s"ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:$hiveVersion?transitive=false")
     assert(sc.listJars()
       .exists(_.contains(s"org.apache.hive.hcatalog_hive-hcatalog-core-$hiveVersion.jar")))
 
-    // test download ivy URL jar return multiple jars
-    sql("ADD JAR ivy://org.scala-js:scalajs-test-interface_2.12:1.2.0?transitive=true")
+    // default transitive=true, test download ivy URL jar return multiple jars
+    sql("ADD JAR ivy://org.scala-js:scalajs-test-interface_2.12:1.2.0")
     assert(sc.listJars().exists(_.contains("scalajs-library_2.12")))
     assert(sc.listJars().exists(_.contains("scalajs-test-interface_2.12")))
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index d4bcba4128db9..903cee743dc1a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -1224,7 +1224,10 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
   test("SPARK-33084: Add jar support Ivy URI in SQL") {
     val testData = TestHive.getHiveFile("data/files/sample.json").toURI
     withTable("t") {
-      sql(s"ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:$hiveVersion")
+      // exclude org.pentaho:pentaho-aggdesigner-algorithm as this transitive dependency does
+      // not exist on mavencentral and hence cannot be found in the test environment
+      sql(s"ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:$hiveVersion" +
+        "?exclude=org.pentaho:pentaho-aggdesigner-algorithm")
       sql(
         """CREATE TABLE t(a string, b string)
           |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'""".stripMargin)

From c63b605ac65bd74c6647bf59b261e6412982e1e2 Mon Sep 17 00:00:00 2001
From: Shardul Mahadik <smahadik@linkedin.com>
Date: Tue, 23 Feb 2021 14:26:44 -0800
Subject: [PATCH 3/4] Fix tests [Take 2]

---
 .../apache/spark/sql/hive/execution/HiveQuerySuite.scala  | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 903cee743dc1a..87c2541dc7555 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -1224,10 +1224,12 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
   test("SPARK-33084: Add jar support Ivy URI in SQL") {
     val testData = TestHive.getHiveFile("data/files/sample.json").toURI
     withTable("t") {
-      // exclude org.pentaho:pentaho-aggdesigner-algorithm as this transitive dependency does
-      // not exist on mavencentral and hence cannot be found in the test environment
+      // hive-catalog-core has some transitive dependencies which dont exist on maven central
+      // and hence cannot be found in the test environment or are non-jar (.pom) which cause
+      // failures in tests. Use transitive=false as it should be good enough to test the Ivy
+      // support in Hive ADD JAR
       sql(s"ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:$hiveVersion" +
-        "?exclude=org.pentaho:pentaho-aggdesigner-algorithm")
+        "?transitive=false")
       sql(
         """CREATE TABLE t(a string, b string)
           |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'""".stripMargin)

From 61db81b54e595819d5c32f59bbdf2a1490b9fb8b Mon Sep 17 00:00:00 2001
From: Shardul Mahadik <smahadik@linkedin.com>
Date: Wed, 24 Feb 2021 13:06:30 -0800
Subject: [PATCH 4/4] Address PR comments

---
 .../src/main/scala/org/apache/spark/util/DependencyUtils.scala | 3 ++-
 docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md               | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
index 9f3b4da6181f9..f7135edd2129d 100644
--- a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
@@ -60,7 +60,8 @@ private[spark] object DependencyUtils extends Logging {
    * @return Tuple value of parameter `transitive` and `exclude` value.
    *
    *         1. transitive: whether to download dependency jar of Ivy URI, default value is true
-   *            and this parameter value is case-insensitive. Invalid value will be treat as false.
+   *            and this parameter value is case-insensitive. This mimics Hive's behaviour for
+   *            parsing the transitive parameter. Invalid value will be treat as false.
    *            Example: Input:  exclude=org.mortbay.jetty:jetty&transitive=true
    *            Output:  true
    *
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md b/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
index f1a7377bd6ff3..e5ac58ba8195f 100644
--- a/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
+++ b/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
@@ -36,7 +36,7 @@ ADD JAR file_name
     The name of the JAR file to be added. It could be either on a local file system or a distributed file system or an Ivy URI.
     Apache Ivy is a popular dependency manager focusing on flexibility and simplicity. Now we support two parameter in URI query string:
 
-     * transitive: whether to download dependent jars related to your ivy URL. The parameter name is case-sensitive, parameter value is case-insensitive and only take last one if multiple transitive parameters are specified.
+     * transitive: whether to download dependent jars related to your ivy URL. The parameter name is case-sensitive, and the parameter value is case-insensitive. If multiple transitive parameters are specified, the last one wins.
      * exclude: exclusion list during downloading Ivy URI jar and dependent jars.
 
     User can write Ivy URI such as: