From 348bb13016e9e960d8d768a457a2ca1bedc4d829 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Tue, 6 Aug 2019 14:43:15 -0500 Subject: [PATCH] Update Spark versions, update spark-xml version. Misc small tweaks. (#401) * Update Spark versions, update spark-xml version. Misc small tweaks. * Fix more occurrences of 0.5.0 --- .travis.yml | 9 ++------- README.md | 10 +++++----- build.sbt | 12 ++++++------ .../com/databricks/spark/xml/util/InferSchema.scala | 2 ++ 4 files changed, 15 insertions(+), 18 deletions(-) diff --git a/.travis.yml b/.travis.yml index 29eb09ce..cc14de42 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,17 +9,12 @@ matrix: - scala: - 2.11.12 env: - - TEST_SPARK_VERSION="2.2.2" - - MIMA="mimaReportBinaryIssues" - - scala: - - 2.11.12 - env: - - TEST_SPARK_VERSION="2.3.2" + - TEST_SPARK_VERSION="2.3.3" - MIMA="mimaReportBinaryIssues" - scala: - 2.12.8 env: - - TEST_SPARK_VERSION="2.4.0" + - TEST_SPARK_VERSION="2.4.3" - MIMA="" # restore when 2.12 build released script: - sbt -Dspark.testVersion=$TEST_SPARK_VERSION ++$TRAVIS_SCALA_VERSION clean scalastyle test:scalastyle $MIMA coverage test coverageReport diff --git a/README.md b/README.md index 6f308693..f4ef57f1 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ You can link against this library in your program at the following coordinates: ``` groupId: com.databricks artifactId: spark-xml_2.11 -version: 0.5.0 +version: 0.6.0 ``` ### Scala 2.12 @@ -31,7 +31,7 @@ version: 0.5.0 ``` groupId: com.databricks artifactId: spark-xml_2.12 -version: 0.5.0 +version: 0.6.0 ``` ## Using with Spark shell @@ -40,12 +40,12 @@ This package can be added to Spark using the `--packages` command line option. F ### Spark compiled with Scala 2.11 ``` -$SPARK_HOME/bin/spark-shell --packages com.databricks:spark-xml_2.11:0.5.0 +$SPARK_HOME/bin/spark-shell --packages com.databricks:spark-xml_2.11:0.6.0 ``` ### Spark compiled with Scala 2.12 ``` -$SPARK_HOME/bin/spark-shell --packages com.databricks:spark-xml_2.12:0.5.0 +$SPARK_HOME/bin/spark-shell --packages com.databricks:spark-xml_2.12:0.6.0 ``` ## Features @@ -354,7 +354,7 @@ which you may make direct use of as follows: ```scala import com.databricks.spark.xml.XmlInputFormat -import org.apache.spark.SparkContext; +import org.apache.spark.SparkContext import org.apache.hadoop.io.{LongWritable, Text} val sc: SparkContext = _ diff --git a/build.sbt b/build.sbt index eeebb80a..2ea119e2 100755 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,6 @@ name := "spark-xml" -version := "0.5.0" +version := "0.6.0" organization := "com.databricks" @@ -12,7 +12,7 @@ crossScalaVersions := Seq("2.11.12", "2.12.8") scalacOptions := Seq("-unchecked", "-deprecation") -sparkVersion := sys.props.get("spark.testVersion").getOrElse("2.4.0") +sparkVersion := sys.props.get("spark.testVersion").getOrElse("2.4.3") sparkComponents := Seq("core", "sql") @@ -22,7 +22,7 @@ autoScalaLibrary := false libraryDependencies ++= Seq( "commons-io" % "commons-io" % "2.6", "org.slf4j" % "slf4j-api" % "1.7.25" % Provided, - "org.scalatest" %% "scalatest" % "3.0.3" % Test, + "org.scalatest" %% "scalatest" % "3.0.8" % Test, "com.novocode" % "junit-interface" % "0.11" % Test, "org.apache.spark" %% "spark-core" % sparkVersion.value % Test, "org.apache.spark" %% "spark-sql" % sparkVersion.value % Test, @@ -68,8 +68,8 @@ publishTo := { credentials += Credentials( "Sonatype Nexus Repository Manager", "oss.sonatype.org", - sys.env.get("USERNAME").getOrElse(""), - sys.env.get("PASSWORD").getOrElse("")) + sys.env.getOrElse("USERNAME", ""), + sys.env.getOrElse("PASSWORD", "")) parallelExecution in Test := false @@ -79,7 +79,7 @@ test in assembly := {} // Prints JUnit tests in output testOptions in Test := Seq(Tests.Argument(TestFrameworks.JUnit, "-v")) -mimaPreviousArtifacts := Set("com.databricks" %% "spark-xml" % "0.4.1") +mimaPreviousArtifacts := Set("com.databricks" %% "spark-xml" % "0.5.0") val ignoredABIProblems = { import com.typesafe.tools.mima.core._ diff --git a/src/main/scala/com/databricks/spark/xml/util/InferSchema.scala b/src/main/scala/com/databricks/spark/xml/util/InferSchema.scala index 6abedde6..86284e51 100644 --- a/src/main/scala/com/databricks/spark/xml/util/InferSchema.scala +++ b/src/main/scala/com/databricks/spark/xml/util/InferSchema.scala @@ -21,6 +21,7 @@ import java.util.Comparator import javax.xml.stream._ import javax.xml.stream.events._ +import scala.annotation.tailrec import scala.collection.JavaConverters._ import scala.collection.Seq import scala.collection.mutable.ArrayBuffer @@ -141,6 +142,7 @@ private[xml] object InferSchema { } } + @tailrec private def inferField(parser: XMLEventReader, options: XmlOptions): DataType = { parser.peek match { case _: EndElement => NullType