Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 0 additions & 69 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1996,75 +1996,6 @@
</exclusions>
</dependency>

<dependency>
<groupId>${hive.group}</groupId>
<artifactId>hive-contrib</artifactId>
<version>${hive.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>${hive.group}</groupId>
<artifactId>hive-exec</artifactId>
</exclusion>
<exclusion>
<groupId>${hive.group}</groupId>
<artifactId>hive-serde</artifactId>
</exclusion>
<exclusion>
<groupId>${hive.group}</groupId>
<artifactId>hive-shims</artifactId>
</exclusion>
<exclusion>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>${hive.group}.hcatalog</groupId>
<artifactId>hive-hcatalog-core</artifactId>
<version>${hive.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>${hive.group}</groupId>
<artifactId>hive-exec</artifactId>
</exclusion>
<exclusion>
<groupId>${hive.group}</groupId>
<artifactId>hive-metastore</artifactId>
</exclusion>
<exclusion>
<groupId>${hive.group}</groupId>
<artifactId>hive-cli</artifactId>
</exclusion>
<exclusion>
<groupId>${hive.group}</groupId>
<artifactId>hive-common</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>
Expand Down
9 changes: 0 additions & 9 deletions sql/hive-thriftserver/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,6 @@
<groupId>${hive.group}</groupId>
<artifactId>hive-beeline</artifactId>
</dependency>
<!-- Explicit listing hive-contrib and hive-hcatalog-core. Otherwise the maven test fails. -->
<dependency>
<groupId>${hive.group}</groupId>
<artifactId>hive-contrib</artifactId>
</dependency>
<dependency>
<groupId>${hive.group}.hcatalog</groupId>
<artifactId>hive-hcatalog-core</artifactId>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,11 @@ import scala.concurrent.Promise
import scala.concurrent.duration._

import org.apache.hadoop.hive.conf.HiveConf.ConfVars
import org.apache.hadoop.hive.contrib.udf.example.UDFExampleFormat
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.SparkFunSuite
import org.apache.spark.internal.Logging
import org.apache.spark.sql.hive.test.HiveTestUtils
import org.apache.spark.sql.hive.test.HiveTestJars
import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer
import org.apache.spark.util.{ThreadUtils, Utils}

Expand Down Expand Up @@ -202,7 +201,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
}

test("Commands using SerDe provided in --jars") {
val jarFile = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath
val jarFile = HiveTestJars.getHiveHcatalogCoreJar().getCanonicalPath

val dataFilePath =
Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt")
Expand Down Expand Up @@ -297,11 +296,12 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
}

test("Support hive.aux.jars.path") {
val hiveContribJar = HiveTestUtils.getHiveContribJar.getCanonicalPath
val hiveContribJar = HiveTestJars.getHiveContribJar().getCanonicalPath
runCliWithin(
1.minute,
Seq("--conf", s"spark.hadoop.${ConfVars.HIVEAUXJARS}=$hiveContribJar"))(
s"CREATE TEMPORARY FUNCTION example_format AS '${classOf[UDFExampleFormat].getName}';" -> "",
"CREATE TEMPORARY FUNCTION example_format AS " +
"'org.apache.hadoop.hive.contrib.udf.example.UDFExampleFormat';" -> "",
"SELECT example_format('%o', 93);" -> "135"
)
}
Expand All @@ -319,7 +319,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {

test("SPARK-28840 test --jars and hive.aux.jars.path command") {
val jarFile = new File("../../sql/hive/src/test/resources/SPARK-21101-1.0.jar").getCanonicalPath
val hiveContribJar = HiveTestUtils.getHiveContribJar.getCanonicalPath
val hiveContribJar = HiveTestJars.getHiveContribJar().getCanonicalPath
runCliWithin(
1.minute,
Seq("--jars", s"$jarFile", "--conf",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ import org.scalatest.BeforeAndAfterAll
import org.apache.spark.{SparkException, SparkFunSuite}
import org.apache.spark.internal.Logging
import org.apache.spark.sql.hive.HiveUtils
import org.apache.spark.sql.hive.test.HiveTestUtils
import org.apache.spark.sql.hive.test.HiveTestJars
import org.apache.spark.sql.internal.StaticSQLConf.HIVE_THRIFT_SERVER_SINGLESESSION
import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer
import org.apache.spark.util.{ThreadUtils, Utils}
Expand Down Expand Up @@ -492,7 +492,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
withMultipleConnectionJdbcStatement("smallKV", "addJar")(
{
statement =>
val jarFile = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath
val jarFile = HiveTestJars.getHiveHcatalogCoreJar().getCanonicalPath

statement.executeQuery(s"ADD JAR $jarFile")
},
Expand Down
8 changes: 0 additions & 8 deletions sql/hive/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,6 @@
<groupId>${hive.group}</groupId>
<artifactId>hive-metastore</artifactId>
</dependency>
<dependency>
<groupId>${hive.group}</groupId>
<artifactId>hive-contrib</artifactId>
</dependency>
<dependency>
<groupId>${hive.group}.hcatalog</groupId>
<artifactId>hive-hcatalog-core</artifactId>
</dependency>
<!--
<dependency>
<groupId>${hive.group}</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.execution.command.DDLUtils
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHiveContext}
import org.apache.spark.sql.hive.test.{HiveTestJars, TestHiveContext}
import org.apache.spark.sql.internal.SQLConf.SHUFFLE_PARTITIONS
import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH
import org.apache.spark.sql.types.{DecimalType, StructType}
Expand Down Expand Up @@ -111,8 +111,8 @@ class HiveSparkSubmitSuite
val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
val jar2 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassB"))
val jar3 = HiveTestUtils.getHiveContribJar.getCanonicalPath
val jar4 = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath
val jar3 = HiveTestJars.getHiveContribJar().getCanonicalPath
val jar4 = HiveTestJars.getHiveHcatalogCoreJar().getCanonicalPath
val jarsString = Seq(jar1, jar2, jar3, jar4).map(j => j.toString).mkString(",")
val args = Seq(
"--class", SparkSubmitClassLoaderTest.getClass.getName.stripSuffix("$"),
Expand Down Expand Up @@ -321,7 +321,7 @@ class HiveSparkSubmitSuite
"--master", "local-cluster[2,1,1024]",
"--conf", "spark.ui.enabled=false",
"--conf", "spark.master.rest.enabled=false",
"--jars", HiveTestUtils.getHiveContribJar.getCanonicalPath,
"--jars", HiveTestJars.getHiveContribJar().getCanonicalPath,
unusedJar.toString)
runSparkSubmit(argsForCreateTable)

Expand Down Expand Up @@ -463,7 +463,7 @@ object TemporaryHiveUDFTest extends Logging {

// Load a Hive UDF from the jar.
logInfo("Registering a temporary Hive UDF provided in a jar.")
val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath
val jar = HiveTestJars.getHiveContribJar().getCanonicalPath
hiveContext.sql(
s"""
|CREATE TEMPORARY FUNCTION example_max
Expand Down Expand Up @@ -501,7 +501,7 @@ object PermanentHiveUDFTest1 extends Logging {

// Load a Hive UDF from the jar.
logInfo("Registering a permanent Hive UDF provided in a jar.")
val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath
val jar = HiveTestJars.getHiveContribJar().getCanonicalPath
hiveContext.sql(
s"""
|CREATE FUNCTION example_max
Expand Down Expand Up @@ -538,7 +538,7 @@ object PermanentHiveUDFTest2 extends Logging {
val hiveContext = new TestHiveContext(sc)
// Load a Hive UDF from the jar.
logInfo("Write the metadata of a permanent Hive UDF into metastore.")
val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath
val jar = HiveTestJars.getHiveContribJar().getCanonicalPath
val function = CatalogFunction(
FunctionIdentifier("example_max"),
"org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.catalyst.plans.logical.Project
import org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec
import org.apache.spark.sql.hive._
import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHive}
import org.apache.spark.sql.hive.test.{HiveTestJars, TestHive}
import org.apache.spark.sql.hive.test.TestHive._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
Expand Down Expand Up @@ -817,7 +817,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd

test("ADD JAR command 2") {
// this is a test case from mapjoin_addjar.q
val testJar = HiveTestUtils.getHiveHcatalogCoreJar.toURI
val testJar = HiveTestJars.getHiveHcatalogCoreJar().toURI
val testData = TestHive.getHiveFile("data/files/sample.json").toURI
sql(s"ADD JAR $testJar")
sql(
Expand All @@ -827,9 +827,9 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
sql("select * from src join t1 on src.key = t1.a")
sql("DROP TABLE t1")
assert(sql("list jars").
filter(_.getString(0).contains(HiveTestUtils.getHiveHcatalogCoreJar.getName)).count() > 0)
filter(_.getString(0).contains(HiveTestJars.getHiveHcatalogCoreJar().getName)).count() > 0)
assert(sql("list jar").
filter(_.getString(0).contains(HiveTestUtils.getHiveHcatalogCoreJar.getName)).count() > 0)
filter(_.getString(0).contains(HiveTestJars.getHiveHcatalogCoreJar().getName)).count() > 0)
val testJar2 = TestHive.getHiveFile("TestUDTF.jar").getCanonicalPath
sql(s"ADD JAR $testJar2")
assert(sql(s"list jar $testJar").count() == 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import org.apache.spark.sql.execution.command.LoadDataCommand
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHiveSingleton}
import org.apache.spark.sql.hive.test.{HiveTestJars, TestHiveSingleton}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
import org.apache.spark.sql.test.SQLTestUtils
Expand Down Expand Up @@ -1106,7 +1106,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
override def run() {
// To make sure this test works, this jar should not be loaded in another place.
sql(
s"ADD JAR ${HiveTestUtils.getHiveContribJar.getCanonicalPath}")
s"ADD JAR ${HiveTestJars.getHiveContribJar().getCanonicalPath}")
try {
sql(
"""
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -647,3 +647,24 @@ private[sql] class TestHiveSessionStateBuilder(

override protected def newBuilder: NewBuilder = new TestHiveSessionStateBuilder(_, _)
}

private[hive] object HiveTestJars {
private val repository = SQLConf.ADDITIONAL_REMOTE_REPOSITORIES.defaultValueString
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can also verify that the default value is valid.

private val hiveTestJarsDir = Utils.createTempDir()

def getHiveContribJar(version: String = HiveUtils.builtinHiveVersion): File =
getJarFromUrl(s"${repository}org/apache/hive/hive-contrib/" +
s"$version/hive-contrib-$version.jar")
def getHiveHcatalogCoreJar(version: String = HiveUtils.builtinHiveVersion): File =
getJarFromUrl(s"${repository}org/apache/hive/hcatalog/hive-hcatalog-core/" +
s"$version/hive-hcatalog-core-$version.jar")

private def getJarFromUrl(urlString: String): File = {
val fileName = urlString.split("/").last
val targetFile = new File(hiveTestJarsDir, fileName)
if (!targetFile.exists()) {
Utils.doFetchFile(urlString, hiveTestJarsDir, fileName, new SparkConf, null, null)
}
targetFile
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @srowen @dongjoon-hyun @HyukjinKwon Do you think this is stable?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know how stable this way it is. While Sean or Dongjoon might have an idea about this, I would verify if --jars in that condition is still tested, and run the tests multiple times to show practically this way works at least.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this could be OK. I was really thinking of refactoring the logic that directly downloads the JAR, found in HiveExternalCatalogVersionsSuite.tryDownloadSpark. However in retrospect, I think that logic is a little bit tied to downloading a release tarball, not a JAR.

If you just want single JARs, how about just constructing the URL for it on repository.apache.org and download? I don't think you have to resolve it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you prefer this?

  private val repository = "https://repository.apache.org/content/repositories/releases/"
  private val hiveContribUrl = s"${repository}org/apache/hive/hive-contrib/" +
    s"${HiveUtils.builtinHiveVersion}/"
  private val hiveHcatalogCoreUrl = s"${repository}org/apache/hive/hcatalog/hive-hcatalog-core/" +
    s"${HiveUtils.builtinHiveVersion}/"

  def getHiveContribJar: File = getFileFromUrl(hiveContribUrl,
    s"hive-contrib-${HiveUtils.builtinHiveVersion}.jar")
  def getHiveHcatalogCoreJar: File = getFileFromUrl(hiveHcatalogCoreUrl,
    s"hive-hcatalog-core-${HiveUtils.builtinHiveVersion}.jar")

  private def getFileFromUrl(urlString: String, filename: String): File = {
    val hiveTestJars = new File("/tmp/test-spark/hiveTestJars")
    if (!hiveTestJars.exists()) {
      hiveTestJars.mkdirs()
    }
    val targetFile = new File(hiveTestJars, filename)
    if (!targetFile.exists() || !(targetFile.length() > 0)) {
      val conf = new SparkConf
      val securityManager = new org.apache.spark.SecurityManager(conf)
      val hadoopConf = new Configuration

      // propagate exceptions up to the caller of getFileFromUrl
      Utils.doFetchFile(urlString, hiveTestJars, filename, conf, securityManager, hadoopConf)
    }
    targetFile
  }

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that kind of thing. It could be a little simpler (e.g. no need for defs for the getHiveContribJar et al, and I think you can pass a null conf/securityManager to download an HTTPS URL) but this seems simpler than trying to resolve the artifact.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  private def getJarFromUrl(urlString: String): File = {
    val fileName = urlString.split("/").last
    val targetFile = new File(hiveTestJarsDir, fileName)
    if (!targetFile.exists()) {
      Utils.doFetchFile(urlString, hiveTestJarsDir, fileName, new SparkConf, null, null)
    }
    targetFile
  }

The method I think it may be incorrect. Why not change to:

  private def getJarFromUrl(urlString: String): File = {
    val fileName = urlString.split("/").last
    Utils.doFetchFile(urlString, hiveTestJarsDir, fileName, new SparkConf, null, null)
  }

Because the targetFile and the return value of Utils.doFetchFile may be different.
In my testing:
targetFile:
/private/var/folders/pl/0j98k60s5zs7lz4kthvkn2dm3954vp/T/spark-c8c1a791-bef2-4e36-bd99-e03d89fd7d27/hive-contrib-1.2.1.jar
Utils.doFetchFile:
/private/var/folders/pl/0j98k60s5zs7lz4kthvkn2dm3954vp/T/spark-fdbe0ba4-a929-499a-9a29-55644bcd31b0/hive-contrib-1.2.1.jar

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you @LantaoJin My change is to avoid downloading these two files many times.

}