-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Closed
Labels
engine:sparkSpark integrationSpark integrationpriority:mediumModerate impact; usability gapsModerate impact; usability gaps
Description
Hi Team,
I'm getting java.lang.NoSuchMethodError exception when I try launching the Spark application in stand alone mode.
Exception Trace:
Exception in thread "main" java.lang.NoSuchMethodError: org.apache.avro.Schema.createUnion([Lorg/apache/avro/Schema;)Lorg/apache/avro/Schema;
at org.apache.hudi.spark.org.apache.spark.sql.avro.SchemaConverters$.toAvroType(SchemaConverters.scala:185)
at org.apache.hudi.spark.org.apache.spark.sql.avro.SchemaConverters$$anonfun$5.apply(SchemaConverters.scala:176)
at org.apache.hudi.spark.org.apache.spark.sql.avro.SchemaConverters$$anonfun$5.apply(SchemaConverters.scala:174)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at org.apache.spark.sql.types.StructType.foreach(StructType.scala:99)
at org.apache.hudi.spark.org.apache.spark.sql.avro.SchemaConverters$.toAvroType(SchemaConverters.scala:174)
at org.apache.hudi.AvroConversionUtils$.convertStructTypeToAvroSchema(AvroConversionUtils.scala:52)
at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:139)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:134)
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:654)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:654)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:654)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:273)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:267)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:225)
Below is my build.sbt file:
scalaVersion := "2.11.8"
libraryDependencies += ("org.apache.spark" % "spark-core_2.11" % "2.3.1"% "provided")
.exclude("org.apache.avro", "avro")
.exclude("org.apache.avro", "avro-ipc")
.exclude("org.apache.avro", "avro-mapred")
libraryDependencies += ("org.apache.spark" % "spark-sql_2.11" % "2.3.1" % "provided")
.exclude("org.apache.avro", "avro")
libraryDependencies += ("org.apache.hudi" % "hudi-spark-bundle_2.11" % "0.7.0")
libraryDependencies += "org.apache.spark" %% "spark-avro" % "2.4.4"
libraryDependencies += ("com.typesafe.play" %% "play-json" % "2.4.0-M3")
.exclude("org.slf4j", "slf4j-api")
.exclude("org.slf4j", "slf4j-log4j12")
.exclude("org.slf4j", "jcl-over-slf4j")
.exclude("io.netty", "netty-all")
Spark submit command:
spark-submit --master local --jars <base-dir>/avro-1.8.2.jar --deploy-mode client \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--class "com.explore.hudi.HudiServiceMainJob" ApacheHudiService.jar
Versions of the softwares installed on my system,
- Scala:
2.11 - Spark:
spark-2.3.1-bin-hadoop2.7
Just to add, I'm trying to read a csv extract and create a hudi table out of it, below is the my sample code that gets executed upon launching my spark application.
sparkSession
.read
.csv(inputCsvAbsPath)
.map(row => LibraryCheckoutInfo(
bibNumber = row.getString(0),
itemBarcode = row.getString(1),
itemType = row.getString(2),
collection = row.getString(3),
callNumber = row.getString(4)))
.write
.format(AppConstants.SPARK_FORMAT_HUDI)
.options(QuickstartUtils.getQuickstartWriteConfigs)
.option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "bibNumber")
.option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "itemType")
.option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "collection")
.option(HoodieWriteConfig.TABLE_NAME, hudiTableName)
.mode(SaveMode.Overwrite)
.save(hudiTableBasePath)
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
engine:sparkSpark integrationSpark integrationpriority:mediumModerate impact; usability gapsModerate impact; usability gaps