From 4973dbdea71ab8e1ab6e0b89287af918109ad6a3 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Tue, 11 Mar 2014 18:10:12 -0700 Subject: [PATCH 01/20] Example build using pom reader. Note this requires a SNAPSHOT version that is published locally. --- project/SparkBuild.scala | 683 --------------------------------------- project/build.scala | 10 + project/plugins.sbt | 4 + 3 files changed, 14 insertions(+), 683 deletions(-) delete mode 100644 project/SparkBuild.scala create mode 100644 project/build.scala diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala deleted file mode 100644 index 599714233c18f..0000000000000 --- a/project/SparkBuild.scala +++ /dev/null @@ -1,683 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import sbt._ -import sbt.Classpaths.publishTask -import sbt.Keys._ -import sbtassembly.Plugin._ -import AssemblyKeys._ -import scala.util.Properties -import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings} -import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact -import sbtunidoc.Plugin._ -import UnidocKeys._ - -import scala.collection.JavaConversions._ - -// For Sonatype publishing -// import com.jsuereth.pgp.sbtplugin.PgpKeys._ - -object SparkBuild extends Build { - val SPARK_VERSION = "1.1.0-SNAPSHOT" - val SPARK_VERSION_SHORT = SPARK_VERSION.replaceAll("-SNAPSHOT", "") - - // Hadoop version to build against. For example, "1.0.4" for Apache releases, or - // "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set - // through the environment variables SPARK_HADOOP_VERSION and SPARK_YARN. - val DEFAULT_HADOOP_VERSION = "1.0.4" - - // Whether the Hadoop version to build against is 2.2.x, or a variant of it. This can be set - // through the SPARK_IS_NEW_HADOOP environment variable. - val DEFAULT_IS_NEW_HADOOP = false - - val DEFAULT_YARN = false - - val DEFAULT_HIVE = false - - // HBase version; set as appropriate. - val HBASE_VERSION = "0.94.6" - - // Target JVM version - val SCALAC_JVM_VERSION = "jvm-1.6" - val JAVAC_JVM_VERSION = "1.6" - - lazy val root = Project("root", file("."), settings = rootSettings) aggregate(allProjects: _*) - - lazy val core = Project("core", file("core"), settings = coreSettings) - - /** Following project only exists to pull previous artifacts of Spark for generating - Mima ignores. For more information see: SPARK 2071 */ - lazy val oldDeps = Project("oldDeps", file("dev"), settings = oldDepsSettings) - - def replDependencies = Seq[ProjectReference](core, graphx, bagel, mllib, sql) ++ maybeHiveRef - - lazy val repl = Project("repl", file("repl"), settings = replSettings) - .dependsOn(replDependencies.map(a => a: sbt.ClasspathDep[sbt.ProjectReference]): _*) - - lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming) - - lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn(core) - - lazy val graphx = Project("graphx", file("graphx"), settings = graphxSettings) dependsOn(core) - - lazy val catalyst = Project("catalyst", file("sql/catalyst"), settings = catalystSettings) dependsOn(core) - - lazy val sql = Project("sql", file("sql/core"), settings = sqlCoreSettings) dependsOn(core) dependsOn(catalyst % "compile->compile;test->test") - - lazy val hive = Project("hive", file("sql/hive"), settings = hiveSettings) dependsOn(sql) - - lazy val maybeHive: Seq[ClasspathDependency] = if (isHiveEnabled) Seq(hive) else Seq() - lazy val maybeHiveRef: Seq[ProjectReference] = if (isHiveEnabled) Seq(hive) else Seq() - - lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn(core) - - lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn(core) - - lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings) - .dependsOn(core, graphx, bagel, mllib, streaming, repl, sql) dependsOn(maybeYarn: _*) dependsOn(maybeHive: _*) dependsOn(maybeGanglia: _*) - - lazy val assembleDepsTask = TaskKey[Unit]("assemble-deps") - lazy val assembleDeps = assembleDepsTask := { - println() - println("**** NOTE ****") - println("'sbt/sbt assemble-deps' is no longer supported.") - println("Instead create a normal assembly and:") - println(" export SPARK_PREPEND_CLASSES=1 (toggle on)") - println(" unset SPARK_PREPEND_CLASSES (toggle off)") - println() - } - - // A configuration to set an alternative publishLocalConfiguration - lazy val MavenCompile = config("m2r") extend(Compile) - lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy") - val sparkHome = System.getProperty("user.dir") - - // Allows build configuration to be set through environment variables - lazy val hadoopVersion = Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION) - lazy val isNewHadoop = Properties.envOrNone("SPARK_IS_NEW_HADOOP") match { - case None => { - val isNewHadoopVersion = "^2\\.[2-9]+".r.findFirstIn(hadoopVersion).isDefined - (isNewHadoopVersion|| DEFAULT_IS_NEW_HADOOP) - } - case Some(v) => v.toBoolean - } - - lazy val isYarnEnabled = Properties.envOrNone("SPARK_YARN") match { - case None => DEFAULT_YARN - case Some(v) => v.toBoolean - } - lazy val hadoopClient = if (hadoopVersion.startsWith("0.20.") || hadoopVersion == "1.0.0") "hadoop-core" else "hadoop-client" - val maybeAvro = if (hadoopVersion.startsWith("0.23.")) Seq("org.apache.avro" % "avro" % "1.7.4") else Seq() - - lazy val isHiveEnabled = Properties.envOrNone("SPARK_HIVE") match { - case None => DEFAULT_HIVE - case Some(v) => v.toBoolean - } - - // Include Ganglia integration if the user has enabled Ganglia - // This is isolated from the normal build due to LGPL-licensed code in the library - lazy val isGangliaEnabled = Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined - lazy val gangliaProj = Project("spark-ganglia-lgpl", file("extras/spark-ganglia-lgpl"), settings = gangliaSettings).dependsOn(core) - val maybeGanglia: Seq[ClasspathDependency] = if (isGangliaEnabled) Seq(gangliaProj) else Seq() - val maybeGangliaRef: Seq[ProjectReference] = if (isGangliaEnabled) Seq(gangliaProj) else Seq() - - // Include the Java 8 project if the JVM version is 8+ - lazy val javaVersion = System.getProperty("java.specification.version") - lazy val isJava8Enabled = javaVersion.toDouble >= "1.8".toDouble - val maybeJava8Tests = if (isJava8Enabled) Seq[ProjectReference](java8Tests) else Seq[ProjectReference]() - lazy val java8Tests = Project("java8-tests", file("extras/java8-tests"), settings = java8TestsSettings). - dependsOn(core) dependsOn(streaming % "compile->compile;test->test") - - // Include the YARN project if the user has enabled YARN - lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core) - lazy val yarn = Project("yarn", file("yarn/stable"), settings = yarnSettings) dependsOn(core) - - lazy val maybeYarn: Seq[ClasspathDependency] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq() - lazy val maybeYarnRef: Seq[ProjectReference] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq() - - lazy val externalTwitter = Project("external-twitter", file("external/twitter"), settings = twitterSettings) - .dependsOn(streaming % "compile->compile;test->test") - - lazy val externalKafka = Project("external-kafka", file("external/kafka"), settings = kafkaSettings) - .dependsOn(streaming % "compile->compile;test->test") - - lazy val externalFlume = Project("external-flume", file("external/flume"), settings = flumeSettings) - .dependsOn(streaming % "compile->compile;test->test") - - lazy val externalZeromq = Project("external-zeromq", file("external/zeromq"), settings = zeromqSettings) - .dependsOn(streaming % "compile->compile;test->test") - - lazy val externalMqtt = Project("external-mqtt", file("external/mqtt"), settings = mqttSettings) - .dependsOn(streaming % "compile->compile;test->test") - - lazy val allExternal = Seq[ClasspathDependency](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt) - lazy val allExternalRefs = Seq[ProjectReference](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt) - - lazy val examples = Project("examples", file("examples"), settings = examplesSettings) - .dependsOn(core, mllib, graphx, bagel, streaming, hive) dependsOn(allExternal: _*) - - // Everything except assembly, hive, tools, java8Tests and examples belong to packageProjects - lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx, catalyst, sql) ++ maybeYarnRef ++ maybeHiveRef ++ maybeGangliaRef - - lazy val allProjects = packageProjects ++ allExternalRefs ++ - Seq[ProjectReference](examples, tools, assemblyProj) ++ maybeJava8Tests - - def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings(file(sparkHome)) ++ Seq( - organization := "org.apache.spark", - version := SPARK_VERSION, - scalaVersion := "2.10.4", - scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation", "-feature", - "-target:" + SCALAC_JVM_VERSION), - javacOptions := Seq("-target", JAVAC_JVM_VERSION, "-source", JAVAC_JVM_VERSION), - unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath }, - retrieveManaged := true, - javaHome := Properties.envOrNone("JAVA_HOME").map(file), - // This is to add convenience of enabling sbt -Dsbt.offline=true for making the build offline. - offline := "true".equalsIgnoreCase(sys.props("sbt.offline")), - retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]", - transitiveClassifiers in Scope.GlobalScope := Seq("sources"), - testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))), - incOptions := incOptions.value.withNameHashing(true), - // Fork new JVMs for tests and set Java options for those - fork := true, - javaOptions in Test += "-Dspark.home=" + sparkHome, - javaOptions in Test += "-Dspark.testing=1", - javaOptions in Test += "-Dsun.io.serialization.extendedDebugInfo=true", - javaOptions in Test ++= System.getProperties.filter(_._1 startsWith "spark").map { case (k,v) => s"-D$k=$v" }.toSeq, - javaOptions in Test ++= "-Xmx3g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g".split(" ").toSeq, - javaOptions += "-Xmx3g", - // Show full stack trace and duration in test cases. - testOptions in Test += Tests.Argument("-oDF"), - // Remove certain packages from Scaladoc - scalacOptions in (Compile, doc) := Seq( - "-groups", - "-skip-packages", Seq( - "akka", - "org.apache.spark.api.python", - "org.apache.spark.network", - "org.apache.spark.deploy", - "org.apache.spark.util.collection" - ).mkString(":"), - "-doc-title", "Spark " + SPARK_VERSION_SHORT + " ScalaDoc" - ), - - // Only allow one test at a time, even across projects, since they run in the same JVM - concurrentRestrictions in Global += Tags.limit(Tags.Test, 1), - - resolvers ++= Seq( - // HTTPS is unavailable for Maven Central - "Maven Repository" at "http://repo.maven.apache.org/maven2", - "Apache Repository" at "https://repository.apache.org/content/repositories/releases", - "JBoss Repository" at "https://repository.jboss.org/nexus/content/repositories/releases/", - "MQTT Repository" at "https://repo.eclipse.org/content/repositories/paho-releases/", - "Cloudera Repository" at "http://repository.cloudera.com/artifactory/cloudera-repos/", - "Pivotal Repository" at "http://repo.spring.io/libs-release/", - // For Sonatype publishing - // "sonatype-snapshots" at "https://oss.sonatype.org/content/repositories/snapshots", - // "sonatype-staging" at "https://oss.sonatype.org/service/local/staging/deploy/maven2/", - // also check the local Maven repository ~/.m2 - Resolver.mavenLocal - ), - - publishMavenStyle := true, - - // useGpg in Global := true, - - pomExtra := ( - - org.apache - apache - 14 - - http://spark.apache.org/ - - - Apache 2.0 License - http://www.apache.org/licenses/LICENSE-2.0.html - repo - - - - scm:git:git@github.com:apache/spark.git - scm:git:git@github.com:apache/spark.git - - - - matei - Matei Zaharia - matei.zaharia@gmail.com - http://www.cs.berkeley.edu/~matei - Apache Software Foundation - http://spark.apache.org - - - - JIRA - https://issues.apache.org/jira/browse/SPARK - - ), - - /* - publishTo <<= version { (v: String) => - val nexus = "https://oss.sonatype.org/" - if (v.trim.endsWith("SNAPSHOT")) - Some("sonatype-snapshots" at nexus + "content/repositories/snapshots") - else - Some("sonatype-staging" at nexus + "service/local/staging/deploy/maven2") - }, - - */ - - libraryDependencies ++= Seq( - "io.netty" % "netty-all" % "4.0.17.Final", - "org.eclipse.jetty" % "jetty-server" % jettyVersion, - "org.eclipse.jetty" % "jetty-util" % jettyVersion, - "org.eclipse.jetty" % "jetty-plus" % jettyVersion, - "org.eclipse.jetty" % "jetty-security" % jettyVersion, - "org.scalatest" %% "scalatest" % "2.1.5" % "test", - "org.scalacheck" %% "scalacheck" % "1.11.3" % "test", - "com.novocode" % "junit-interface" % "0.10" % "test", - "org.easymock" % "easymockclassextension" % "3.1" % "test", - "org.mockito" % "mockito-all" % "1.9.0" % "test", - "junit" % "junit" % "4.10" % "test", - // Needed by cglib which is needed by easymock. - "asm" % "asm" % "3.3.1" % "test" - ), - - testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"), - parallelExecution := true, - /* Workaround for issue #206 (fixed after SBT 0.11.0) */ - watchTransitiveSources <<= Defaults.inDependencies[Task[Seq[File]]](watchSources.task, - const(std.TaskExtra.constant(Nil)), aggregate = true, includeRoot = true) apply { _.join.map(_.flatten) }, - - otherResolvers := Seq(Resolver.file("dotM2", file(Path.userHome + "/.m2/repository"))), - publishLocalConfiguration in MavenCompile <<= (packagedArtifacts, deliverLocal, ivyLoggingLevel) map { - (arts, _, level) => new PublishConfiguration(None, "dotM2", arts, Seq(), level) - }, - publishMavenStyle in MavenCompile := true, - publishLocal in MavenCompile <<= publishTask(publishLocalConfiguration in MavenCompile, deliverLocal), - publishLocalBoth <<= Seq(publishLocal in MavenCompile, publishLocal).dependOn - ) ++ net.virtualvoid.sbt.graph.Plugin.graphSettings ++ ScalaStyleSettings ++ genjavadocSettings - - val akkaVersion = "2.2.3-shaded-protobuf" - val chillVersion = "0.3.6" - val codahaleMetricsVersion = "3.0.0" - val jblasVersion = "1.2.3" - val jets3tVersion = if ("^2\\.[3-9]+".r.findFirstIn(hadoopVersion).isDefined) "0.9.0" else "0.7.1" - val jettyVersion = "8.1.14.v20131031" - val hiveVersion = "0.12.0" - val parquetVersion = "1.4.3" - val slf4jVersion = "1.7.5" - - val excludeJBossNetty = ExclusionRule(organization = "org.jboss.netty") - val excludeIONetty = ExclusionRule(organization = "io.netty") - val excludeEclipseJetty = ExclusionRule(organization = "org.eclipse.jetty") - val excludeAsm = ExclusionRule(organization = "org.ow2.asm") - val excludeOldAsm = ExclusionRule(organization = "asm") - val excludeCommonsLogging = ExclusionRule(organization = "commons-logging") - val excludeSLF4J = ExclusionRule(organization = "org.slf4j") - val excludeScalap = ExclusionRule(organization = "org.scala-lang", artifact = "scalap") - val excludeHadoop = ExclusionRule(organization = "org.apache.hadoop") - val excludeCurator = ExclusionRule(organization = "org.apache.curator") - val excludePowermock = ExclusionRule(organization = "org.powermock") - val excludeFastutil = ExclusionRule(organization = "it.unimi.dsi") - val excludeJruby = ExclusionRule(organization = "org.jruby") - val excludeThrift = ExclusionRule(organization = "org.apache.thrift") - val excludeServletApi = ExclusionRule(organization = "javax.servlet", artifact = "servlet-api") - val excludeJUnit = ExclusionRule(organization = "junit") - - def sparkPreviousArtifact(id: String, organization: String = "org.apache.spark", - version: String = "1.0.0", crossVersion: String = "2.10"): Option[sbt.ModuleID] = { - val fullId = if (crossVersion.isEmpty) id else id + "_" + crossVersion - Some(organization % fullId % version) // the artifact to compare binary compatibility with - } - - def coreSettings = sharedSettings ++ Seq( - name := "spark-core", - libraryDependencies ++= Seq( - "com.google.guava" % "guava" % "14.0.1", - "org.apache.commons" % "commons-lang3" % "3.3.2", - "org.apache.commons" % "commons-math3" % "3.3" % "test", - "com.google.code.findbugs" % "jsr305" % "1.3.9", - "log4j" % "log4j" % "1.2.17", - "org.slf4j" % "slf4j-api" % slf4jVersion, - "org.slf4j" % "slf4j-log4j12" % slf4jVersion, - "org.slf4j" % "jul-to-slf4j" % slf4jVersion, - "org.slf4j" % "jcl-over-slf4j" % slf4jVersion, - "commons-daemon" % "commons-daemon" % "1.0.10", // workaround for bug HADOOP-9407 - "com.ning" % "compress-lzf" % "1.0.0", - "org.xerial.snappy" % "snappy-java" % "1.0.5", - "org.spark-project.akka" %% "akka-remote" % akkaVersion, - "org.spark-project.akka" %% "akka-slf4j" % akkaVersion, - "org.spark-project.akka" %% "akka-testkit" % akkaVersion % "test", - "org.json4s" %% "json4s-jackson" % "3.2.6" excludeAll(excludeScalap), - "colt" % "colt" % "1.2.0", - "org.apache.mesos" % "mesos" % "0.18.1" classifier("shaded-protobuf") exclude("com.google.protobuf", "protobuf-java"), - "commons-net" % "commons-net" % "2.2", - "net.java.dev.jets3t" % "jets3t" % jets3tVersion excludeAll(excludeCommonsLogging), - "commons-codec" % "commons-codec" % "1.5", // Prevent jets3t from including the older version of commons-codec - "org.apache.derby" % "derby" % "10.4.2.0" % "test", - "org.apache.hadoop" % hadoopClient % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeCommonsLogging, excludeSLF4J, excludeOldAsm, excludeServletApi), - "org.apache.curator" % "curator-recipes" % "2.4.0" excludeAll(excludeJBossNetty), - "com.codahale.metrics" % "metrics-core" % codahaleMetricsVersion, - "com.codahale.metrics" % "metrics-jvm" % codahaleMetricsVersion, - "com.codahale.metrics" % "metrics-json" % codahaleMetricsVersion, - "com.codahale.metrics" % "metrics-graphite" % codahaleMetricsVersion, - "com.twitter" %% "chill" % chillVersion excludeAll(excludeAsm), - "com.twitter" % "chill-java" % chillVersion excludeAll(excludeAsm), - "org.tachyonproject" % "tachyon" % "0.4.1-thrift" excludeAll(excludeHadoop, excludeCurator, excludeEclipseJetty, excludePowermock), - "com.clearspring.analytics" % "stream" % "2.7.0" excludeAll(excludeFastutil), // Only HyperLogLogPlus is used, which does not depend on fastutil. - "org.spark-project" % "pyrolite" % "2.0.1", - "net.sf.py4j" % "py4j" % "0.8.1" - ), - libraryDependencies ++= maybeAvro, - assembleDeps, - previousArtifact := sparkPreviousArtifact("spark-core") - ) - - // Create a colon-separate package list adding "org.apache.spark" in front of all of them, - // for easier specification of JavaDoc package groups - def packageList(names: String*): String = { - names.map(s => "org.apache.spark." + s).mkString(":") - } - - def rootSettings = sharedSettings ++ scalaJavaUnidocSettings ++ Seq( - publish := {}, - - unidocProjectFilter in (ScalaUnidoc, unidoc) := - inAnyProject -- inProjects(repl, examples, tools, catalyst, yarn, yarnAlpha), - unidocProjectFilter in (JavaUnidoc, unidoc) := - inAnyProject -- inProjects(repl, examples, bagel, graphx, catalyst, tools, yarn, yarnAlpha), - - // Skip class names containing $ and some internal packages in Javadocs - unidocAllSources in (JavaUnidoc, unidoc) := { - (unidocAllSources in (JavaUnidoc, unidoc)).value - .map(_.filterNot(_.getName.contains("$"))) - .map(_.filterNot(_.getCanonicalPath.contains("akka"))) - .map(_.filterNot(_.getCanonicalPath.contains("deploy"))) - .map(_.filterNot(_.getCanonicalPath.contains("network"))) - .map(_.filterNot(_.getCanonicalPath.contains("executor"))) - .map(_.filterNot(_.getCanonicalPath.contains("python"))) - .map(_.filterNot(_.getCanonicalPath.contains("collection"))) - }, - - // Javadoc options: create a window title, and group key packages on index page - javacOptions in doc := Seq( - "-windowtitle", "Spark " + SPARK_VERSION_SHORT + " JavaDoc", - "-public", - "-group", "Core Java API", packageList("api.java", "api.java.function"), - "-group", "Spark Streaming", packageList( - "streaming.api.java", "streaming.flume", "streaming.kafka", - "streaming.mqtt", "streaming.twitter", "streaming.zeromq" - ), - "-group", "MLlib", packageList( - "mllib.classification", "mllib.clustering", "mllib.evaluation.binary", "mllib.linalg", - "mllib.linalg.distributed", "mllib.optimization", "mllib.rdd", "mllib.recommendation", - "mllib.regression", "mllib.stat", "mllib.tree", "mllib.tree.configuration", - "mllib.tree.impurity", "mllib.tree.model", "mllib.util" - ), - "-group", "Spark SQL", packageList("sql.api.java", "sql.hive.api.java"), - "-noqualifier", "java.lang" - ) - ) - - def replSettings = sharedSettings ++ Seq( - name := "spark-repl", - libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-compiler" % v), - libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "jline" % v), - libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-reflect" % v) - ) - - def examplesSettings = sharedSettings ++ Seq( - name := "spark-examples", - jarName in assembly <<= version map { - v => "spark-examples-" + v + "-hadoop" + hadoopVersion + ".jar" }, - libraryDependencies ++= Seq( - "com.twitter" %% "algebird-core" % "0.1.11", - "org.apache.hbase" % "hbase" % HBASE_VERSION excludeAll(excludeIONetty, excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging, excludeJruby), - "org.apache.cassandra" % "cassandra-all" % "1.2.6" - exclude("com.google.guava", "guava") - exclude("com.googlecode.concurrentlinkedhashmap", "concurrentlinkedhashmap-lru") - exclude("com.ning","compress-lzf") - exclude("io.netty", "netty") - exclude("jline","jline") - exclude("org.apache.cassandra.deps", "avro") - excludeAll(excludeSLF4J, excludeIONetty), - "com.github.scopt" %% "scopt" % "3.2.0" - ) - ) ++ assemblySettings ++ extraAssemblySettings - - def toolsSettings = sharedSettings ++ Seq( - name := "spark-tools", - libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-compiler" % v), - libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-reflect" % v ) - ) ++ assemblySettings ++ extraAssemblySettings - - def graphxSettings = sharedSettings ++ Seq( - name := "spark-graphx", - previousArtifact := sparkPreviousArtifact("spark-graphx"), - libraryDependencies ++= Seq( - "org.jblas" % "jblas" % jblasVersion - ) - ) - - def bagelSettings = sharedSettings ++ Seq( - name := "spark-bagel", - previousArtifact := sparkPreviousArtifact("spark-bagel") - ) - - def mllibSettings = sharedSettings ++ Seq( - name := "spark-mllib", - previousArtifact := sparkPreviousArtifact("spark-mllib"), - libraryDependencies ++= Seq( - "org.jblas" % "jblas" % jblasVersion, - "org.scalanlp" %% "breeze" % "0.7" excludeAll(excludeJUnit) - ) - ) - - def catalystSettings = sharedSettings ++ Seq( - name := "catalyst", - // The mechanics of rewriting expression ids to compare trees in some test cases makes - // assumptions about the the expression ids being contiguous. Running tests in parallel breaks - // this non-deterministically. TODO: FIX THIS. - parallelExecution in Test := false, - libraryDependencies ++= Seq( - "com.typesafe" %% "scalalogging-slf4j" % "1.0.1" - ) - ) - - def sqlCoreSettings = sharedSettings ++ Seq( - name := "spark-sql", - libraryDependencies ++= Seq( - "com.twitter" % "parquet-column" % parquetVersion, - "com.twitter" % "parquet-hadoop" % parquetVersion, - "com.fasterxml.jackson.core" % "jackson-databind" % "2.3.0" // json4s-jackson 3.2.6 requires jackson-databind 2.3.0. - ), - initialCommands in console := - """ - |import org.apache.spark.sql.catalyst.analysis._ - |import org.apache.spark.sql.catalyst.dsl._ - |import org.apache.spark.sql.catalyst.errors._ - |import org.apache.spark.sql.catalyst.expressions._ - |import org.apache.spark.sql.catalyst.plans.logical._ - |import org.apache.spark.sql.catalyst.rules._ - |import org.apache.spark.sql.catalyst.types._ - |import org.apache.spark.sql.catalyst.util._ - |import org.apache.spark.sql.execution - |import org.apache.spark.sql.test.TestSQLContext._ - |import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin - ) - - // Since we don't include hive in the main assembly this project also acts as an alternative - // assembly jar. - def hiveSettings = sharedSettings ++ Seq( - name := "spark-hive", - javaOptions += "-XX:MaxPermSize=1g", - libraryDependencies ++= Seq( - "org.spark-project.hive" % "hive-metastore" % hiveVersion, - "org.spark-project.hive" % "hive-exec" % hiveVersion excludeAll(excludeCommonsLogging), - "org.spark-project.hive" % "hive-serde" % hiveVersion - ), - // Multiple queries rely on the TestHive singleton. See comments there for more details. - parallelExecution in Test := false, - // Supporting all SerDes requires us to depend on deprecated APIs, so we turn off the warnings - // only for this subproject. - scalacOptions <<= scalacOptions map { currentOpts: Seq[String] => - currentOpts.filterNot(_ == "-deprecation") - }, - initialCommands in console := - """ - |import org.apache.spark.sql.catalyst.analysis._ - |import org.apache.spark.sql.catalyst.dsl._ - |import org.apache.spark.sql.catalyst.errors._ - |import org.apache.spark.sql.catalyst.expressions._ - |import org.apache.spark.sql.catalyst.plans.logical._ - |import org.apache.spark.sql.catalyst.rules._ - |import org.apache.spark.sql.catalyst.types._ - |import org.apache.spark.sql.catalyst.util._ - |import org.apache.spark.sql.execution - |import org.apache.spark.sql.hive._ - |import org.apache.spark.sql.hive.test.TestHive._ - |import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin - ) - - def streamingSettings = sharedSettings ++ Seq( - name := "spark-streaming", - previousArtifact := sparkPreviousArtifact("spark-streaming") - ) - - def yarnCommonSettings = sharedSettings ++ Seq( - unmanagedSourceDirectories in Compile <++= baseDirectory { base => - Seq( - base / "../common/src/main/scala" - ) - }, - - unmanagedSourceDirectories in Test <++= baseDirectory { base => - Seq( - base / "../common/src/test/scala" - ) - } - - ) ++ extraYarnSettings - - def yarnAlphaSettings = yarnCommonSettings ++ Seq( - name := "spark-yarn-alpha" - ) - - def yarnSettings = yarnCommonSettings ++ Seq( - name := "spark-yarn" - ) - - def gangliaSettings = sharedSettings ++ Seq( - name := "spark-ganglia-lgpl", - libraryDependencies += "com.codahale.metrics" % "metrics-ganglia" % "3.0.0" - ) - - def java8TestsSettings = sharedSettings ++ Seq( - name := "java8-tests", - javacOptions := Seq("-target", "1.8", "-source", "1.8"), - testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a") - ) - - // Conditionally include the YARN dependencies because some tools look at all sub-projects and will complain - // if we refer to nonexistent dependencies (e.g. hadoop-yarn-api from a Hadoop version without YARN). - def extraYarnSettings = if(isYarnEnabled) yarnEnabledSettings else Seq() - - def yarnEnabledSettings = Seq( - libraryDependencies ++= Seq( - // Exclude rule required for all ? - "org.apache.hadoop" % hadoopClient % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm), - "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging), - "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging), - "org.apache.hadoop" % "hadoop-yarn-client" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging), - "org.apache.hadoop" % "hadoop-yarn-server-web-proxy" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging, excludeServletApi) - ) - ) - - def assemblyProjSettings = sharedSettings ++ Seq( - name := "spark-assembly", - jarName in assembly <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" } - ) ++ assemblySettings ++ extraAssemblySettings - - def extraAssemblySettings() = Seq( - test in assembly := {}, - mergeStrategy in assembly := { - case PathList("org", "datanucleus", xs @ _*) => MergeStrategy.discard - case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard - case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard - case "log4j.properties" => MergeStrategy.discard - case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines - case "reference.conf" => MergeStrategy.concat - case _ => MergeStrategy.first - } - ) - - def oldDepsSettings() = Defaults.defaultSettings ++ Seq( - name := "old-deps", - scalaVersion := "2.10.4", - retrieveManaged := true, - retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]", - libraryDependencies := Seq("spark-streaming-mqtt", "spark-streaming-zeromq", - "spark-streaming-flume", "spark-streaming-kafka", "spark-streaming-twitter", - "spark-streaming", "spark-mllib", "spark-bagel", "spark-graphx", - "spark-core").map(sparkPreviousArtifact(_).get intransitive()) - ) - - def twitterSettings() = sharedSettings ++ Seq( - name := "spark-streaming-twitter", - previousArtifact := sparkPreviousArtifact("spark-streaming-twitter"), - libraryDependencies ++= Seq( - "org.twitter4j" % "twitter4j-stream" % "3.0.3" - ) - ) - - def kafkaSettings() = sharedSettings ++ Seq( - name := "spark-streaming-kafka", - previousArtifact := sparkPreviousArtifact("spark-streaming-kafka"), - libraryDependencies ++= Seq( - "com.github.sgroschupf" % "zkclient" % "0.1", - "org.apache.kafka" %% "kafka" % "0.8.0" - exclude("com.sun.jdmk", "jmxtools") - exclude("com.sun.jmx", "jmxri") - exclude("net.sf.jopt-simple", "jopt-simple") - excludeAll(excludeSLF4J) - ) - ) - - def flumeSettings() = sharedSettings ++ Seq( - name := "spark-streaming-flume", - previousArtifact := sparkPreviousArtifact("spark-streaming-flume"), - libraryDependencies ++= Seq( - "org.apache.flume" % "flume-ng-sdk" % "1.4.0" % "compile" excludeAll(excludeIONetty, excludeThrift) - ) - ) - - def zeromqSettings() = sharedSettings ++ Seq( - name := "spark-streaming-zeromq", - previousArtifact := sparkPreviousArtifact("spark-streaming-zeromq"), - libraryDependencies ++= Seq( - "org.spark-project.akka" %% "akka-zeromq" % akkaVersion - ) - ) - - def mqttSettings() = streamingSettings ++ Seq( - name := "spark-streaming-mqtt", - previousArtifact := sparkPreviousArtifact("spark-streaming-mqtt"), - libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0") - ) -} diff --git a/project/build.scala b/project/build.scala new file mode 100644 index 0000000000000..ab846cd874818 --- /dev/null +++ b/project/build.scala @@ -0,0 +1,10 @@ +import sbt._ +import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys} + +object MyBuild extends PomBuild { + override def settings = { + println("Settings accessed") // Temprorary debug statement + super.settings ++ Seq(SbtPomKeys.profiles := Seq("yarn")) + } +} + diff --git a/project/plugins.sbt b/project/plugins.sbt index 472819b9fb8ba..698c43d491411 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,3 +1,4 @@ +<<<<<<< HEAD scalaVersion := "2.10.4" resolvers += Resolver.url("artifactory", url("http://scalasbt.artifactoryonline.com/scalasbt/sbt-plugin-releases"))(Resolver.ivyStylePatterns) @@ -24,3 +25,6 @@ addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.6") addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.0") addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.3.0") + +addSbtPlugin("com.typesafe.sbt" % "sbt-pom-reader" % "1.0-SNAPSHOT") + From 36efa627d3f70fc35af8b6ec3898d277deb93fc6 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Mon, 17 Mar 2014 17:23:25 -0700 Subject: [PATCH 02/20] Set project name in pom files and added eclipse/intellij plugins. --- bagel/pom.xml | 3 +++ core/pom.xml | 3 +++ examples/pom.xml | 3 +++ external/flume/pom.xml | 3 +++ external/kafka/pom.xml | 3 +++ external/mqtt/pom.xml | 3 +++ external/twitter/pom.xml | 3 +++ external/zeromq/pom.xml | 3 +++ graphx/pom.xml | 3 +++ mllib/pom.xml | 3 +++ project/build.scala | 10 +++++++++- project/plugins.sbt | 5 +++++ repl/pom.xml | 1 + streaming/pom.xml | 3 +++ tools/pom.xml | 3 +++ 15 files changed, 51 insertions(+), 1 deletion(-) diff --git a/bagel/pom.xml b/bagel/pom.xml index c8e39a415af28..5c9d1854488c9 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -27,6 +27,9 @@ org.apache.spark spark-bagel_2.10 + + spark-bagel + jar Spark Project Bagel http://spark.apache.org/ diff --git a/core/pom.xml b/core/pom.xml index 6abf8480d5da0..7e678bd0fc8a7 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -27,6 +27,9 @@ org.apache.spark spark-core_2.10 + + spark-core + jar Spark Project Core http://spark.apache.org/ diff --git a/examples/pom.xml b/examples/pom.xml index 4f6d7fdb87d47..a34894004f5df 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -27,6 +27,9 @@ org.apache.spark spark-examples_2.10 + + spark-examples + jar Spark Project Examples http://spark.apache.org/ diff --git a/external/flume/pom.xml b/external/flume/pom.xml index c1f581967777b..e5807f160a239 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -27,6 +27,9 @@ org.apache.spark spark-streaming-flume_2.10 + + spark-streaming-flume + jar Spark Project External Flume http://spark.apache.org/ diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml index d014a7aad0fca..2e8c8815f0833 100644 --- a/external/kafka/pom.xml +++ b/external/kafka/pom.xml @@ -27,6 +27,9 @@ org.apache.spark spark-streaming-kafka_2.10 + + spark-streaming-kafka + jar Spark Project External Kafka http://spark.apache.org/ diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml index 4980208cba3b0..a4a6263b23f59 100644 --- a/external/mqtt/pom.xml +++ b/external/mqtt/pom.xml @@ -27,6 +27,9 @@ org.apache.spark spark-streaming-mqtt_2.10 + + spark-streaming-mqtt + jar Spark Project External MQTT http://spark.apache.org/ diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml index 7073bd4404d9c..4448a7881e0e9 100644 --- a/external/twitter/pom.xml +++ b/external/twitter/pom.xml @@ -27,6 +27,9 @@ org.apache.spark spark-streaming-twitter_2.10 + + spark-streaming-twitter + jar Spark Project External Twitter http://spark.apache.org/ diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml index cf306e0dca8bd..5e372ba132f25 100644 --- a/external/zeromq/pom.xml +++ b/external/zeromq/pom.xml @@ -27,6 +27,9 @@ org.apache.spark spark-streaming-zeromq_2.10 + + spark-streaming-zeromq + jar Spark Project External ZeroMQ http://spark.apache.org/ diff --git a/graphx/pom.xml b/graphx/pom.xml index 7d5d83e7f3bb9..75204db45d8b2 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -27,6 +27,9 @@ org.apache.spark spark-graphx_2.10 + + spark-graphx + jar Spark Project GraphX http://spark.apache.org/ diff --git a/mllib/pom.xml b/mllib/pom.xml index b622f96dd7901..b5685abcc530d 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -27,6 +27,9 @@ org.apache.spark spark-mllib_2.10 + + spark-mllib + jar Spark Project ML Library http://spark.apache.org/ diff --git a/project/build.scala b/project/build.scala index ab846cd874818..05550b12d4489 100644 --- a/project/build.scala +++ b/project/build.scala @@ -1,10 +1,18 @@ import sbt._ import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys} +import net.virtualvoid.sbt.graph.Plugin.graphSettings + object MyBuild extends PomBuild { override def settings = { println("Settings accessed") // Temprorary debug statement - super.settings ++ Seq(SbtPomKeys.profiles := Seq("yarn")) + super.settings ++ Seq(SbtPomKeys.profiles := Seq()) + } + + override def projectDefinitions(baseDirectory: File): Seq[Project] = { + super.projectDefinitions(baseDirectory).map { x => + x.settings(graphSettings: _*) + } } } diff --git a/project/plugins.sbt b/project/plugins.sbt index 698c43d491411..6c36a71024879 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -28,3 +28,8 @@ addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.3.0") addSbtPlugin("com.typesafe.sbt" % "sbt-pom-reader" % "1.0-SNAPSHOT") +addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.5.1") + +addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.4.0") + +addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4") diff --git a/repl/pom.xml b/repl/pom.xml index 4a66408ef3d2d..dfcf0d01ed7b9 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -32,6 +32,7 @@ http://spark.apache.org/ + spark-repl /usr/share/spark root diff --git a/streaming/pom.xml b/streaming/pom.xml index f506d6ce34a6f..b30e4fc2941d2 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -27,6 +27,9 @@ org.apache.spark spark-streaming_2.10 + + spark-streaming + jar Spark Project Streaming http://spark.apache.org/ diff --git a/tools/pom.xml b/tools/pom.xml index 79cd8551d0722..ca227c75d522f 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -26,6 +26,9 @@ org.apache.spark spark-tools_2.10 + + spark-tools + jar Spark Project Tools http://spark.apache.org/ From 96cea1fb59fd6194b44a2b141f238facb3063b8c Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Tue, 27 May 2014 11:35:50 +0530 Subject: [PATCH 03/20] SPARK-1776 Have Spark's SBT build read dependencies from Maven. --- assembly/pom.xml | 1 + bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 6 +- extras/spark-ganglia-lgpl/pom.xml | 6 +- graphx/pom.xml | 2 +- mllib/pom.xml | 2 +- project/MimaBuild.scala | 8 + project/SparkBuild.scala | 265 +++++++++++++++++++++++++ project/build.scala | 18 -- project/plugins.sbt | 3 - project/project/SparkPluginBuild.scala | 4 +- repl/pom.xml | 2 +- sbt/sbt | 1 + sbt/sbt-launch-lib.bash | 11 +- sql/catalyst/pom.xml | 3 + sql/core/pom.xml | 3 + sql/hive/pom.xml | 3 + streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/alpha/pom.xml | 3 + yarn/pom.xml | 3 + yarn/stable/pom.xml | 3 + 29 files changed, 329 insertions(+), 38 deletions(-) create mode 100644 project/SparkBuild.scala delete mode 100644 project/build.scala diff --git a/assembly/pom.xml b/assembly/pom.xml index 0c60b66c3daca..4f6aade133db7 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -32,6 +32,7 @@ pom + assembly scala-${scala.binary.version} spark-assembly-${project.version}-hadoop${hadoop.version}.jar ${project.build.directory}/${spark.jar.dir}/${spark.jar.basename} diff --git a/bagel/pom.xml b/bagel/pom.xml index 5c9d1854488c9..90c4b095bb611 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -28,7 +28,7 @@ org.apache.spark spark-bagel_2.10 - spark-bagel + bagel jar Spark Project Bagel diff --git a/core/pom.xml b/core/pom.xml index 7e678bd0fc8a7..4ed920a750fff 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -28,7 +28,7 @@ org.apache.spark spark-core_2.10 - spark-core + core jar Spark Project Core diff --git a/examples/pom.xml b/examples/pom.xml index a34894004f5df..bd1c387c2eb91 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -28,7 +28,7 @@ org.apache.spark spark-examples_2.10 - spark-examples + examples jar Spark Project Examples diff --git a/external/flume/pom.xml b/external/flume/pom.xml index e5807f160a239..61a6aff543aed 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -28,7 +28,7 @@ org.apache.spark spark-streaming-flume_2.10 - spark-streaming-flume + streaming-flume jar Spark Project External Flume diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml index 2e8c8815f0833..4762c50685a93 100644 --- a/external/kafka/pom.xml +++ b/external/kafka/pom.xml @@ -28,7 +28,7 @@ org.apache.spark spark-streaming-kafka_2.10 - spark-streaming-kafka + streaming-kafka jar Spark Project External Kafka diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml index a4a6263b23f59..32c530e600ce0 100644 --- a/external/mqtt/pom.xml +++ b/external/mqtt/pom.xml @@ -28,7 +28,7 @@ org.apache.spark spark-streaming-mqtt_2.10 - spark-streaming-mqtt + streaming-mqtt jar Spark Project External MQTT diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml index 4448a7881e0e9..637adb0f00da0 100644 --- a/external/twitter/pom.xml +++ b/external/twitter/pom.xml @@ -28,7 +28,7 @@ org.apache.spark spark-streaming-twitter_2.10 - spark-streaming-twitter + streaming-twitter jar Spark Project External Twitter diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml index 5e372ba132f25..e4d758a04a4cd 100644 --- a/external/zeromq/pom.xml +++ b/external/zeromq/pom.xml @@ -28,7 +28,7 @@ org.apache.spark spark-streaming-zeromq_2.10 - spark-streaming-zeromq + streaming-zeromq jar Spark Project External ZeroMQ diff --git a/extras/java8-tests/pom.xml b/extras/java8-tests/pom.xml index 955ec1a8c3033..3eade411b38b7 100644 --- a/extras/java8-tests/pom.xml +++ b/extras/java8-tests/pom.xml @@ -28,7 +28,11 @@ java8-tests_2.10 pom Spark Project Java8 Tests POM - + + + java8-tests + + org.apache.spark diff --git a/extras/spark-ganglia-lgpl/pom.xml b/extras/spark-ganglia-lgpl/pom.xml index 22ea330b4374d..a5b162a0482e4 100644 --- a/extras/spark-ganglia-lgpl/pom.xml +++ b/extras/spark-ganglia-lgpl/pom.xml @@ -29,7 +29,11 @@ spark-ganglia-lgpl_2.10 jar Spark Ganglia Integration - + + + ganglia-lgpl + + org.apache.spark diff --git a/graphx/pom.xml b/graphx/pom.xml index 75204db45d8b2..7e3bcf29dcfbc 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -28,7 +28,7 @@ org.apache.spark spark-graphx_2.10 - spark-graphx + graphx jar Spark Project GraphX diff --git a/mllib/pom.xml b/mllib/pom.xml index b5685abcc530d..87afd7ecf2dd4 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -28,7 +28,7 @@ org.apache.spark spark-mllib_2.10 - spark-mllib + mllib jar Spark Project ML Library diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala index bb2d73741c3bf..e3d465d68836c 100644 --- a/project/MimaBuild.scala +++ b/project/MimaBuild.scala @@ -59,9 +59,17 @@ object MimaBuild { val defaultExcludes = Seq() // Read package-private excludes from file +<<<<<<< variant A val classExcludeFilePath = file(base.getAbsolutePath + "/.generated-mima-class-excludes") val memberExcludeFilePath = file(base.getAbsolutePath + "/.generated-mima-member-excludes") +>>>>>>> variant B + val excludeFilePath = base.getAbsolutePath + "/.generated-mima-excludes" + val excludeFile = file(excludeFilePath) +####### Ancestor + val excludeFilePath = (base.getAbsolutePath + "/.generated-mima-excludes") + val excludeFile = file(excludeFilePath) +======= end val ignoredClasses: Seq[String] = if (!classExcludeFilePath.exists()) { Seq() diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala new file mode 100644 index 0000000000000..73a7e606dc9d9 --- /dev/null +++ b/project/SparkBuild.scala @@ -0,0 +1,265 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import scala.util.Properties +import scala.collection.JavaConversions._ + +import sbt._ +import sbt.Keys._ +import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings} +import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys} +import net.virtualvoid.sbt.graph.Plugin.graphSettings + +object BuildCommons { + + val sparkVersion = "1.0.0-SNAPSHOT" + + private val buildLocation = file(".").getAbsoluteFile.getParentFile + + val allProjects@Seq(bagel, catalyst, core, graphx, hive, mllib, repl, spark, sql, streaming, + streamingFlume, streamingKafka, streamingMqtt, streamingTwitter, streamingZeromq) = + Seq("bagel", "catalyst", "core", "graphx", "hive", "mllib", "repl", "spark", "sql", + "streaming", "streaming-flume", "streaming-kafka", "streaming-mqtt", "streaming-twitter", + "streaming-zeromq").map(ProjectRef(buildLocation, _)) + + val optionallyEnabledProjects@Seq(yarn, yarnStable, yarnAlpha, java8Tests, sparkGangliaLgpl) = + Seq("yarn", "yarn-stable", "yarn-alpha", "java8-tests", "ganglia-lgpl").map(ProjectRef(buildLocation, _)) + + val assemblyProjects@Seq(assembly, examples, tools) = Seq("assembly", "examples", "tools") + .map(ProjectRef(buildLocation, _)) + + val sparkHome = buildLocation +} + +object SparkBuild extends PomBuild { + + import BuildCommons._ + import scala.collection.mutable.Map + + val projectsMap: Map[String, Seq[Setting[_]]] = Map.empty + + def backwardCompatibility = { + import scala.collection.mutable + var profiles: mutable.Seq[String] = mutable.Seq.empty + if (Properties.envOrNone("SPARK_YARN").isDefined) profiles ++= Seq("yarn") + if (Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined) profiles ++= Seq("spark-ganglia-lgpl") + if (Properties.envOrNone("SPARK_HIVE").isDefined) profiles ++= Seq("hive") + Properties.envOrNone("SPARK_HADOOP_VERSION") match { + case Some(v) => System.setProperty("hadoop.version", v) + case None => + } + profiles + } + + override val profiles = Properties.envOrNone("MAVEN_PROFILES") match { + case None => backwardCompatibility + // Rationale: If -P option exists no need to support backwardCompatibility. + case Some(v) => v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq + } + + override val userPropertiesMap = System.getProperties.toMap + + lazy val sharedSettings = graphSettings ++ ScalaStyleSettings ++ Seq ( + javaHome := Properties.envOrNone("JAVA_HOME").map(file), + incOptions := incOptions.value.withNameHashing(true), + publishMavenStyle := true + ) + + def enable(settings: Seq[Setting[_]])(projectRef: ProjectRef) = { + val existingSettings = projectsMap.getOrElse(projectRef.project, Seq[Setting[_]]()) + projectsMap += (projectRef.project -> (existingSettings ++ settings)) + } + + // Note ordering of these settings matter. + /* Enable shared settings on all projects */ + allProjects ++ optionallyEnabledProjects ++ assemblyProjects foreach enable(sharedSettings) + + /* Enable tests settings for all projects except examples, assembly and tools */ + allProjects ++ optionallyEnabledProjects foreach enable(TestSettings.s) + + /* Enable Mima for all projects except spark, sql, hive, catalyst and repl */ + allProjects filterNot(y => Seq(spark, sql, hive, catalyst, repl).exists(x => x == y)) foreach (x => enable(MimaSettings.effectiveSetting(x))(x)) + + /* Enable Assembly for all assembly projects */ + assemblyProjects foreach enable(AssemblySettings.s) + + /* Enable unidoc only for the root spark project */ + Seq(spark) foreach enable (UnidocSettings.s) + + /* Hive console settings */ + Seq(hive) foreach enable (hiveSettings) + + lazy val hiveSettings = Seq( + + javaOptions += "-XX:MaxPermSize=1g", + // Multiple queries rely on the TestHive singleton. See comments there for more details. + parallelExecution in Test := false, + // Supporting all SerDes requires us to depend on deprecated APIs, so we turn off the warnings + // only for this subproject. + scalacOptions <<= scalacOptions map { currentOpts: Seq[String] => + currentOpts.filterNot(_ == "-deprecation") + }, + initialCommands in console := + """ + |import org.apache.spark.sql.catalyst.analysis._ + |import org.apache.spark.sql.catalyst.dsl._ + |import org.apache.spark.sql.catalyst.errors._ + |import org.apache.spark.sql.catalyst.expressions._ + |import org.apache.spark.sql.catalyst.plans.logical._ + |import org.apache.spark.sql.catalyst.rules._ + |import org.apache.spark.sql.catalyst.types._ + |import org.apache.spark.sql.catalyst.util._ + |import org.apache.spark.sql.execution + |import org.apache.spark.sql.hive._ + |import org.apache.spark.sql.hive.test.TestHive._ + |import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin + ) + + // TODO: move this to its upstream project. + override def projectDefinitions(baseDirectory: File): Seq[Project] = { + super.projectDefinitions(baseDirectory).map { x => + if (projectsMap.exists(_._1 == x.id)) x.settings(projectsMap(x.id): _*) + else x.settings(Seq[Setting[_]](): _*) + } + } + +} + +object MimaSettings { + + import BuildCommons._ + import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact + + private lazy val s = MimaBuild.mimaSettings(sparkHome) + + def effectiveSetting(projectRef: ProjectRef) = { + val organization = "org.apache.spark" + val version = "0.9.0-incubating" + val fullId = "spark-" + projectRef.project + "_2.10" + s ++ Seq(previousArtifact := Some(organization % fullId % version)) + } +} + +object AssemblySettings { + import sbtassembly.Plugin._ + import AssemblyKeys._ + + lazy val s = assemblySettings ++ Seq( + test in assembly := {}, + jarName in assembly <<= (version, moduleName) map { (v, mName) => mName + "-"+v + "-hadoop" + + Option(System.getProperty("hadoop.version")).getOrElse("1.0.4") + ".jar" }, // TODO: add proper default hadoop version. + mergeStrategy in assembly := { + case PathList("org", "datanucleus", xs @ _*) => MergeStrategy.discard + case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard + case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard + case "log4j.properties" => MergeStrategy.discard + case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines + case "reference.conf" => MergeStrategy.concat + case _ => MergeStrategy.first + } + ) + +} + +object UnidocSettings { + + import BuildCommons._ + import sbtunidoc.Plugin._ + import UnidocKeys._ + + // for easier specification of JavaDoc package groups + private def packageList(names: String*): String = { + names.map(s => "org.apache.spark." + s).mkString(":") + } + + lazy val s = scalaJavaUnidocSettings ++ Seq ( + publish := {}, + + unidocProjectFilter in(ScalaUnidoc, unidoc) := + inAnyProject -- inProjects(repl, examples, tools, catalyst, yarn, yarnAlpha), + unidocProjectFilter in(JavaUnidoc, unidoc) := + inAnyProject -- inProjects(repl, bagel, graphx, examples, tools, catalyst, yarn, yarnAlpha), + + // Skip class names containing $ and some internal packages in Javadocs + unidocAllSources in (JavaUnidoc, unidoc) := { + (unidocAllSources in (JavaUnidoc, unidoc)).value + .map(_.filterNot(_.getName.contains("$"))) + .map(_.filterNot(_.getCanonicalPath.contains("akka"))) + .map(_.filterNot(_.getCanonicalPath.contains("deploy"))) + .map(_.filterNot(_.getCanonicalPath.contains("network"))) + .map(_.filterNot(_.getCanonicalPath.contains("executor"))) + .map(_.filterNot(_.getCanonicalPath.contains("python"))) + .map(_.filterNot(_.getCanonicalPath.contains("collection"))) + }, + // Remove certain packages from Scaladoc + scalacOptions in (Compile, doc) := Seq( + "-groups", + "-skip-packages", Seq( + "akka", + "org.apache.spark.api.python", + "org.apache.spark.network", + "org.apache.spark.deploy", + "org.apache.spark.util.collection" + ).mkString(":"), + "-doc-title", "Spark " + sparkVersion.replaceAll("-SNAPSHOT", "") + " ScalaDoc" + ), + + // Javadoc options: create a window title, and group key packages on index page + javacOptions in doc := Seq( + "-windowtitle", "Spark " + sparkVersion.replaceAll("-SNAPSHOT", "") + " JavaDoc", + "-public", + "-group", "Core Java API", packageList("api.java", "api.java.function"), + "-group", "Spark Streaming", packageList( + "streaming.api.java", "streaming.flume", "streaming.kafka", + "streaming.mqtt", "streaming.twitter", "streaming.zeromq" + ), + "-group", "MLlib", packageList( + "mllib.classification", "mllib.clustering", "mllib.evaluation.binary", "mllib.linalg", + "mllib.linalg.distributed", "mllib.optimization", "mllib.rdd", "mllib.recommendation", + "mllib.regression", "mllib.stat", "mllib.tree", "mllib.tree.configuration", + "mllib.tree.impurity", "mllib.tree.model", "mllib.util" + ), + "-group", "Spark SQL", packageList("sql.api.java", "sql.hive.api.java"), + "-noqualifier", "java.lang" + ) + ) +} + +object TestSettings { + + import BuildCommons._ + + lazy val s = Seq ( + // Fork new JVMs for tests and set Java options for those + fork := true, + javaOptions in Test += "-Dspark.home=" + sparkHome, + javaOptions in Test += "-Dspark.testing=1", + javaOptions in Test += "-Dsun.io.serialization.extendedDebugInfo=true", + javaOptions in Test ++= System.getProperties.filter(_._1 startsWith "spark").map { case (k,v) => s"-D$k=$v" }.toSeq, + javaOptions in Test ++= "-Xmx3g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g".split(" ").toSeq, + javaOptions += "-Xmx3g", + + // Show full stack trace and duration in test cases. + testOptions in Test += Tests.Argument("-oDF"), + testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"), + // Enable Junit testing. + libraryDependencies += "com.novocode" % "junit-interface" % "0.9" % "test", + // Only allow one test at a time, even across projects, since they run in the same JVM + parallelExecution in Test := false, + concurrentRestrictions in Global += Tags.limit(Tags.Test, 1)) + +} diff --git a/project/build.scala b/project/build.scala deleted file mode 100644 index 05550b12d4489..0000000000000 --- a/project/build.scala +++ /dev/null @@ -1,18 +0,0 @@ -import sbt._ -import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys} -import net.virtualvoid.sbt.graph.Plugin.graphSettings - - -object MyBuild extends PomBuild { - override def settings = { - println("Settings accessed") // Temprorary debug statement - super.settings ++ Seq(SbtPomKeys.profiles := Seq()) - } - - override def projectDefinitions(baseDirectory: File): Seq[Project] = { - super.projectDefinitions(baseDirectory).map { x => - x.settings(graphSettings: _*) - } - } -} - diff --git a/project/plugins.sbt b/project/plugins.sbt index 6c36a71024879..3e49c659a51cc 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,4 +1,3 @@ -<<<<<<< HEAD scalaVersion := "2.10.4" resolvers += Resolver.url("artifactory", url("http://scalasbt.artifactoryonline.com/scalasbt/sbt-plugin-releases"))(Resolver.ivyStylePatterns) @@ -26,8 +25,6 @@ addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.0") addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.3.0") -addSbtPlugin("com.typesafe.sbt" % "sbt-pom-reader" % "1.0-SNAPSHOT") - addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.5.1") addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.4.0") diff --git a/project/project/SparkPluginBuild.scala b/project/project/SparkPluginBuild.scala index e9fba641eb8a1..3ef2d5451da0d 100644 --- a/project/project/SparkPluginBuild.scala +++ b/project/project/SparkPluginBuild.scala @@ -24,8 +24,10 @@ import sbt.Keys._ * becomes available for scalastyle sbt plugin. */ object SparkPluginDef extends Build { - lazy val root = Project("plugins", file(".")) dependsOn(sparkStyle) + lazy val root = Project("plugins", file(".")) dependsOn(sparkStyle, sbtPomReader) lazy val sparkStyle = Project("spark-style", file("spark-style"), settings = styleSettings) + lazy val sbtPomReader = uri("https://github.com/ScrapCodes/sbt-pom-reader.git") + // There is actually no need to publish this artifact. def styleSettings = Defaults.defaultSettings ++ Seq ( name := "spark-style", diff --git a/repl/pom.xml b/repl/pom.xml index dfcf0d01ed7b9..4ebb1b82f0e8c 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -32,7 +32,7 @@ http://spark.apache.org/ - spark-repl + repl /usr/share/spark root diff --git a/sbt/sbt b/sbt/sbt index 9de265bd07dcb..1b1aa1483a829 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -72,6 +72,7 @@ Usage: $script_name [options] -J-X pass option -X directly to the java runtime (-J is stripped) -S-X add -X to sbt's scalacOptions (-J is stripped) + -PmavenProfiles Enable a maven profile for the build. In the case of duplicated or conflicting options, the order above shows precedence: JAVA_OPTS lowest, command line options highest. diff --git a/sbt/sbt-launch-lib.bash b/sbt/sbt-launch-lib.bash index 64e40a88206be..857b62ffa229c 100755 --- a/sbt/sbt-launch-lib.bash +++ b/sbt/sbt-launch-lib.bash @@ -16,6 +16,7 @@ declare -a residual_args declare -a java_args declare -a scalac_args declare -a sbt_commands +declare -a maven_profiles if test -x "$JAVA_HOME/bin/java"; then echo -e "Using $JAVA_HOME as default JAVA_HOME." @@ -87,6 +88,13 @@ addJava () { dlog "[addJava] arg = '$1'" java_args=( "${java_args[@]}" "$1" ) } + +enableProfile () { + dlog "[enableProfile] arg = '$1'" + maven_profiles=( "${maven_profiles[@]}" "$1" ) + export MAVEN_PROFILES="${maven_profiles[@]}" +} + addSbt () { dlog "[addSbt] arg = '$1'" sbt_commands=( "${sbt_commands[@]}" "$1" ) @@ -141,7 +149,8 @@ process_args () { -java-home) require_arg path "$1" "$2" && java_cmd="$2/bin/java" && export JAVA_HOME=$2 && shift 2 ;; -D*) addJava "$1" && shift ;; - -J*) addJava "${1:2}" && shift ;; + -J*) addJava "${1:2}" && shift ;; + -P*) enableProfile "$1" && shift ;; *) addResidual "$1" && shift ;; esac done diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 01d7b569080ea..6decde3fcd62d 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -31,6 +31,9 @@ jar Spark Project Catalyst http://spark.apache.org/ + + catalyst + diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 8210fd1f210d1..c309c43804d97 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -31,6 +31,9 @@ jar Spark Project SQL http://spark.apache.org/ + + sql + diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 5ede76e5c3904..c343f85465499 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -31,6 +31,9 @@ jar Spark Project Hive http://spark.apache.org/ + + hive + diff --git a/streaming/pom.xml b/streaming/pom.xml index b30e4fc2941d2..f60697ce745b7 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -28,7 +28,7 @@ org.apache.spark spark-streaming_2.10 - spark-streaming + streaming jar Spark Project Streaming diff --git a/tools/pom.xml b/tools/pom.xml index ca227c75d522f..c0ee8faa7a615 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -27,7 +27,7 @@ org.apache.spark spark-tools_2.10 - spark-tools + tools jar Spark Project Tools diff --git a/yarn/alpha/pom.xml b/yarn/alpha/pom.xml index b8a631dd0bb3b..5b13a1f002d6e 100644 --- a/yarn/alpha/pom.xml +++ b/yarn/alpha/pom.xml @@ -23,6 +23,9 @@ 1.1.0-SNAPSHOT ../pom.xml + + yarn-alpha + org.apache.spark spark-yarn-alpha_2.10 diff --git a/yarn/pom.xml b/yarn/pom.xml index ef7066ef1fdfc..efb473aa1b261 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -28,6 +28,9 @@ yarn-parent_2.10 pom Spark Project YARN Parent POM + + yarn + diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml index 0931beb505508..ceaf9f9d71001 100644 --- a/yarn/stable/pom.xml +++ b/yarn/stable/pom.xml @@ -23,6 +23,9 @@ 1.1.0-SNAPSHOT ../pom.xml + + yarn-stable + org.apache.spark spark-yarn_2.10 From 9439ea345899ada30bdcbb11d258ee8e15286c4c Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Tue, 27 May 2014 11:54:22 +0530 Subject: [PATCH 04/20] Small fix to run-examples script. --- bin/run-example | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/run-example b/bin/run-example index e7a5fe3914fbd..0c30c12f4cb4e 100755 --- a/bin/run-example +++ b/bin/run-example @@ -34,9 +34,9 @@ else fi if [ -f "$FWDIR/RELEASE" ]; then - export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar` -elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then - export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar` + export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples*hadoop*.jar` +elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples*hadoop*.jar ]; then + export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples*hadoop*.jar` fi if [[ -z $SPARK_EXAMPLES_JAR ]]; then From cf88758f664eaadf4588a412e614366768e690e3 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Tue, 27 May 2014 17:40:00 +0530 Subject: [PATCH 05/20] cleanup --- project/MimaBuild.scala | 13 ++++++--- project/SparkBuild.scala | 59 ++++++++++++++++------------------------ 2 files changed, 33 insertions(+), 39 deletions(-) diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala index e3d465d68836c..980fa302d99d2 100644 --- a/project/MimaBuild.scala +++ b/project/MimaBuild.scala @@ -88,8 +88,13 @@ object MimaBuild { ignoredMembers.flatMap(excludeMember) ++ MimaExcludes.excludes } - def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq( - previousArtifact := None, - binaryIssueFilters ++= ignoredABIProblems(sparkHome) - ) + def mimaSettings(sparkHome: File, projectRef: ProjectRef) = { + val organization = "org.apache.spark" + val version = "0.9.0-incubating" + val fullId = "spark-" + projectRef.project + "_2.10" + mimaDefaultSettings ++ + Seq(previousArtifact := Some(organization % fullId % version), + binaryIssueFilters ++= ignoredABIProblems(sparkHome)) + } + } diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 73a7e606dc9d9..45d10f4f7ca66 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -89,21 +89,34 @@ object SparkBuild extends PomBuild { allProjects ++ optionallyEnabledProjects ++ assemblyProjects foreach enable(sharedSettings) /* Enable tests settings for all projects except examples, assembly and tools */ - allProjects ++ optionallyEnabledProjects foreach enable(TestSettings.s) + allProjects ++ optionallyEnabledProjects foreach enable(TestSettings.settings) /* Enable Mima for all projects except spark, sql, hive, catalyst and repl */ - allProjects filterNot(y => Seq(spark, sql, hive, catalyst, repl).exists(x => x == y)) foreach (x => enable(MimaSettings.effectiveSetting(x))(x)) + allProjects.filterNot(y => Seq(spark, sql, hive, catalyst, repl).exists(x => x == y)). + foreach (x => enable(MimaBuild.mimaSettings(sparkHome, x))(x)) /* Enable Assembly for all assembly projects */ - assemblyProjects foreach enable(AssemblySettings.s) + assemblyProjects foreach enable(Assembly.settings) /* Enable unidoc only for the root spark project */ - Seq(spark) foreach enable (UnidocSettings.s) + enable(Unidoc.settings)(spark) /* Hive console settings */ - Seq(hive) foreach enable (hiveSettings) + enable(Hive.settings)(hive) - lazy val hiveSettings = Seq( + // TODO: move this to its upstream project. + override def projectDefinitions(baseDirectory: File): Seq[Project] = { + super.projectDefinitions(baseDirectory).map { x => + if (projectsMap.exists(_._1 == x.id)) x.settings(projectsMap(x.id): _*) + else x.settings(Seq[Setting[_]](): _*) + } + } + +} + +object Hive { + + lazy val settings = Seq( javaOptions += "-XX:MaxPermSize=1g", // Multiple queries rely on the TestHive singleton. See comments there for more details. @@ -129,36 +142,13 @@ object SparkBuild extends PomBuild { |import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin ) - // TODO: move this to its upstream project. - override def projectDefinitions(baseDirectory: File): Seq[Project] = { - super.projectDefinitions(baseDirectory).map { x => - if (projectsMap.exists(_._1 == x.id)) x.settings(projectsMap(x.id): _*) - else x.settings(Seq[Setting[_]](): _*) - } - } - } -object MimaSettings { - - import BuildCommons._ - import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact - - private lazy val s = MimaBuild.mimaSettings(sparkHome) - - def effectiveSetting(projectRef: ProjectRef) = { - val organization = "org.apache.spark" - val version = "0.9.0-incubating" - val fullId = "spark-" + projectRef.project + "_2.10" - s ++ Seq(previousArtifact := Some(organization % fullId % version)) - } -} - -object AssemblySettings { +object Assembly { import sbtassembly.Plugin._ import AssemblyKeys._ - lazy val s = assemblySettings ++ Seq( + lazy val settings = assemblySettings ++ Seq( test in assembly := {}, jarName in assembly <<= (version, moduleName) map { (v, mName) => mName + "-"+v + "-hadoop" + Option(System.getProperty("hadoop.version")).getOrElse("1.0.4") + ".jar" }, // TODO: add proper default hadoop version. @@ -175,7 +165,7 @@ object AssemblySettings { } -object UnidocSettings { +object Unidoc { import BuildCommons._ import sbtunidoc.Plugin._ @@ -186,7 +176,7 @@ object UnidocSettings { names.map(s => "org.apache.spark." + s).mkString(":") } - lazy val s = scalaJavaUnidocSettings ++ Seq ( + lazy val settings = scalaJavaUnidocSettings ++ Seq ( publish := {}, unidocProjectFilter in(ScalaUnidoc, unidoc) := @@ -240,10 +230,9 @@ object UnidocSettings { } object TestSettings { - import BuildCommons._ - lazy val s = Seq ( + lazy val settings = Seq ( // Fork new JVMs for tests and set Java options for those fork := true, javaOptions in Test += "-Dspark.home=" + sparkHome, From a2f5ae1593b7f00250007463156cbce39f21acb1 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Wed, 28 May 2014 20:29:18 +0530 Subject: [PATCH 06/20] Fixes a bug in dependencies. --- sql/hive/pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index c343f85465499..f30ae28b81e06 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -51,6 +51,11 @@ hive-metastore ${hive.version} + + commons-httpclient + commons-httpclient + 3.1 + org.spark-project.hive hive-exec From a49c61b689f72e6ba67e5716b5292638714e6c1d Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Wed, 28 May 2014 21:59:39 +0530 Subject: [PATCH 07/20] Fix for tools jar --- bin/spark-class | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index 60d9657c0ffcd..4eee745bacef5 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -110,9 +110,9 @@ export JAVA_OPTS TOOLS_DIR="$FWDIR"/tools SPARK_TOOLS_JAR="" -if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar ]; then +if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar ]; then # Use the JAR from the SBT build - export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar` + export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar` fi if [ -e "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar ]; then # Use the JAR from the Maven build From dccc8acfb21dd226289da655c02e219170427fe6 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Fri, 30 May 2014 12:42:31 +0530 Subject: [PATCH 08/20] updated mima to check against 1.0 --- project/MimaBuild.scala | 2 +- project/SparkBuild.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala index 980fa302d99d2..8421a4c32eb61 100644 --- a/project/MimaBuild.scala +++ b/project/MimaBuild.scala @@ -90,7 +90,7 @@ object MimaBuild { def mimaSettings(sparkHome: File, projectRef: ProjectRef) = { val organization = "org.apache.spark" - val version = "0.9.0-incubating" + val version = "1.0.0" val fullId = "spark-" + projectRef.project + "_2.10" mimaDefaultSettings ++ Seq(previousArtifact := Some(organization % fullId % version), diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 45d10f4f7ca66..41bbc76829f56 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -26,7 +26,7 @@ import net.virtualvoid.sbt.graph.Plugin.graphSettings object BuildCommons { - val sparkVersion = "1.0.0-SNAPSHOT" + val sparkVersion = "1.1.0-SNAPSHOT" private val buildLocation = file(".").getAbsoluteFile.getParentFile From d0a02f2b367ebe7934c1aa34890b7e13385539f6 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Mon, 2 Jun 2014 12:14:58 +0530 Subject: [PATCH 09/20] Bumped up pom versions, Since the build now depends on pom it is better updated there. + general cleanups. --- project/MimaBuild.scala | 17 ++++------------- project/MimaExcludes.scala | 4 ++-- project/SparkBuild.scala | 31 +++++++++++++++---------------- project/plugins.sbt | 6 ------ 4 files changed, 21 insertions(+), 37 deletions(-) diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala index 8421a4c32eb61..20808b8ea5038 100644 --- a/project/MimaBuild.scala +++ b/project/MimaBuild.scala @@ -21,7 +21,6 @@ import com.typesafe.tools.mima.core.MissingTypesProblem import com.typesafe.tools.mima.core.ProblemFilters._ import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact} import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings -import sbt._ object MimaBuild { @@ -53,23 +52,15 @@ object MimaBuild { excludePackage("org.apache.spark." + packageName) } - def ignoredABIProblems(base: File) = { + def ignoredABIProblems(base: File, currentSparkVersion: String) = { // Excludes placed here will be used for all Spark versions val defaultExcludes = Seq() // Read package-private excludes from file -<<<<<<< variant A val classExcludeFilePath = file(base.getAbsolutePath + "/.generated-mima-class-excludes") val memberExcludeFilePath = file(base.getAbsolutePath + "/.generated-mima-member-excludes") ->>>>>>> variant B - val excludeFilePath = base.getAbsolutePath + "/.generated-mima-excludes" - val excludeFile = file(excludeFilePath) -####### Ancestor - val excludeFilePath = (base.getAbsolutePath + "/.generated-mima-excludes") - val excludeFile = file(excludeFilePath) -======= end val ignoredClasses: Seq[String] = if (!classExcludeFilePath.exists()) { Seq() @@ -90,11 +81,11 @@ object MimaBuild { def mimaSettings(sparkHome: File, projectRef: ProjectRef) = { val organization = "org.apache.spark" - val version = "1.0.0" + val previousSparkVersion = "1.0.0" val fullId = "spark-" + projectRef.project + "_2.10" mimaDefaultSettings ++ - Seq(previousArtifact := Some(organization % fullId % version), - binaryIssueFilters ++= ignoredABIProblems(sparkHome)) + Seq(previousArtifact := Some(organization % fullId % previousSparkVersion), + binaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value)) } } diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 1621833e124f5..44bc9dc5fb690 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -31,8 +31,8 @@ import com.typesafe.tools.mima.core._ * MimaBuild.excludeSparkClass("graphx.util.collection.GraphXPrimitiveKeyOpenHashMap") */ object MimaExcludes { - val excludes = - SparkBuild.SPARK_VERSION match { + def excludes(version: String) = + version match { case v if v.startsWith("1.1") => Seq( MimaBuild.excludeSparkPackage("deploy"), diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 41bbc76829f56..f060af4c22c27 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -25,8 +25,6 @@ import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys} import net.virtualvoid.sbt.graph.Plugin.graphSettings object BuildCommons { - - val sparkVersion = "1.1.0-SNAPSHOT" private val buildLocation = file(".").getAbsoluteFile.getParentFile @@ -195,22 +193,10 @@ object Unidoc { .map(_.filterNot(_.getCanonicalPath.contains("python"))) .map(_.filterNot(_.getCanonicalPath.contains("collection"))) }, - // Remove certain packages from Scaladoc - scalacOptions in (Compile, doc) := Seq( - "-groups", - "-skip-packages", Seq( - "akka", - "org.apache.spark.api.python", - "org.apache.spark.network", - "org.apache.spark.deploy", - "org.apache.spark.util.collection" - ).mkString(":"), - "-doc-title", "Spark " + sparkVersion.replaceAll("-SNAPSHOT", "") + " ScalaDoc" - ), // Javadoc options: create a window title, and group key packages on index page javacOptions in doc := Seq( - "-windowtitle", "Spark " + sparkVersion.replaceAll("-SNAPSHOT", "") + " JavaDoc", + "-windowtitle", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc", "-public", "-group", "Core Java API", packageList("api.java", "api.java.function"), "-group", "Spark Streaming", packageList( @@ -249,6 +235,19 @@ object TestSettings { libraryDependencies += "com.novocode" % "junit-interface" % "0.9" % "test", // Only allow one test at a time, even across projects, since they run in the same JVM parallelExecution in Test := false, - concurrentRestrictions in Global += Tags.limit(Tags.Test, 1)) + concurrentRestrictions in Global += Tags.limit(Tags.Test, 1), + // Remove certain packages from Scaladoc + scalacOptions in (Compile, doc) := Seq( + "-groups", + "-skip-packages", Seq( + "akka", + "org.apache.spark.api.python", + "org.apache.spark.network", + "org.apache.spark.deploy", + "org.apache.spark.util.collection" + ).mkString(":"), + "-doc-title", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " ScalaDoc" + ) + ) } diff --git a/project/plugins.sbt b/project/plugins.sbt index 3e49c659a51cc..472819b9fb8ba 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -24,9 +24,3 @@ addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.6") addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.0") addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.3.0") - -addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.5.1") - -addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.4.0") - -addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4") From 89b9777f8c31c21e768d5f95404f3a656f3ce48e Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Tue, 24 Jun 2014 13:41:29 +0530 Subject: [PATCH 10/20] Merge conflicts --- project/MimaBuild.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala index 20808b8ea5038..034ba6a7bf50f 100644 --- a/project/MimaBuild.scala +++ b/project/MimaBuild.scala @@ -15,6 +15,9 @@ * limitations under the License. */ +import sbt._ +import sbt.Keys.version + import com.typesafe.tools.mima.core._ import com.typesafe.tools.mima.core.MissingClassProblem import com.typesafe.tools.mima.core.MissingTypesProblem @@ -22,6 +25,7 @@ import com.typesafe.tools.mima.core.ProblemFilters._ import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact} import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings + object MimaBuild { def excludeMember(fullName: String) = Seq( @@ -76,7 +80,7 @@ object MimaBuild { } defaultExcludes ++ ignoredClasses.flatMap(excludeClass) ++ - ignoredMembers.flatMap(excludeMember) ++ MimaExcludes.excludes + ignoredMembers.flatMap(excludeMember) ++ MimaExcludes.excludes(currentSparkVersion) } def mimaSettings(sparkHome: File, projectRef: ProjectRef) = { From 446768eca198e4cd2369c39a97c42251b5b5e2cd Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Tue, 24 Jun 2014 14:57:43 +0530 Subject: [PATCH 11/20] minor fix --- dev/scalastyle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/scalastyle b/dev/scalastyle index 0e8fd5cc8d64c..27962274bd60e 100755 --- a/dev/scalastyle +++ b/dev/scalastyle @@ -19,7 +19,7 @@ echo -e "q\n" | SPARK_HIVE=true sbt/sbt scalastyle > scalastyle.txt # Check style with YARN alpha built too -echo -e "q\n" | SPARK_HADOOP_VERSION=0.23.9 SPARK_YARN=true sbt/sbt yarn-alpha/scalastyle \ +echo -e "q\n" | sbt/sbt -Dhadoop.version=0.23.9 -Pyarn-alpha yarn-alpha/scalastyle \ >> scalastyle.txt # Check style with YARN built too echo -e "q\n" | SPARK_HADOOP_VERSION=2.2.0 SPARK_YARN=true sbt/sbt yarn/scalastyle \ From 65cf06c404f93f6170a10a4d54466b46f19905ee Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Wed, 25 Jun 2014 13:00:20 +0530 Subject: [PATCH 12/20] Servelet API jars mess up with the other servlet jars on the class path. --- pom.xml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pom.xml b/pom.xml index 05f76d566e9d1..9131763a67b80 100644 --- a/pom.xml +++ b/pom.xml @@ -535,6 +535,10 @@ org.mortbay.jetty servlet-api-2.5 + + javax.servlet + servlet-api + junit junit @@ -618,6 +622,10 @@ hadoop-yarn-api ${yarn.version} + + javax.servlet + servlet-api + asm asm @@ -641,6 +649,10 @@ hadoop-yarn-common ${yarn.version} + + javax.servlet + servlet-api + asm asm @@ -668,6 +680,10 @@ hadoop-yarn-server-web-proxy ${yarn.version} + + javax.servlet + servlet-api + asm asm @@ -695,6 +711,10 @@ hadoop-yarn-client ${yarn.version} + + javax.servlet + servlet-api + asm asm From 6af91acd1fb675bc69dfeb70dece8bd0120f5eea Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Wed, 25 Jun 2014 16:59:05 +0530 Subject: [PATCH 13/20] Ported oldDeps back. + fixes issues with prev commit. --- pom.xml | 12 ------------ project/SparkBuild.scala | 24 +++++++++++++++++++++++- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/pom.xml b/pom.xml index 9131763a67b80..0fd41f626b8a5 100644 --- a/pom.xml +++ b/pom.xml @@ -649,10 +649,6 @@ hadoop-yarn-common ${yarn.version} - - javax.servlet - servlet-api - asm asm @@ -680,10 +676,6 @@ hadoop-yarn-server-web-proxy ${yarn.version} - - javax.servlet - servlet-api - asm asm @@ -711,10 +703,6 @@ hadoop-yarn-client ${yarn.version} - - javax.servlet - servlet-api - asm asm diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index f060af4c22c27..b740004328b10 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -74,9 +74,31 @@ object SparkBuild extends PomBuild { lazy val sharedSettings = graphSettings ++ ScalaStyleSettings ++ Seq ( javaHome := Properties.envOrNone("JAVA_HOME").map(file), incOptions := incOptions.value.withNameHashing(true), + retrieveManaged := true, + retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]", publishMavenStyle := true ) + /** Following project only exists to pull previous artifacts of Spark for generating + Mima ignores. For more information see: SPARK 2071 */ + lazy val oldDeps = Project("oldDeps", file("dev"), settings = oldDepsSettings) + + def versionArtifact(id: String): Option[sbt.ModuleID] = { + val fullId = id + "_2.10" + Some("org.apache.spark" % fullId % "1.0.0") + } + + def oldDepsSettings() = Defaults.defaultSettings ++ Seq( + name := "old-deps", + scalaVersion := "2.10.4", + retrieveManaged := true, + retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]", + libraryDependencies := Seq("spark-streaming-mqtt", "spark-streaming-zeromq", + "spark-streaming-flume", "spark-streaming-kafka", "spark-streaming-twitter", + "spark-streaming", "spark-mllib", "spark-bagel", "spark-graphx", + "spark-core").map(versionArtifact(_).get intransitive()) + ) + def enable(settings: Seq[Setting[_]])(projectRef: ProjectRef) = { val existingSettings = projectsMap.getOrElse(projectRef.project, Seq[Setting[_]]()) projectsMap += (projectRef.project -> (existingSettings ++ settings)) @@ -107,7 +129,7 @@ object SparkBuild extends PomBuild { super.projectDefinitions(baseDirectory).map { x => if (projectsMap.exists(_._1 == x.id)) x.settings(projectsMap(x.id): _*) else x.settings(Seq[Setting[_]](): _*) - } + } ++ Seq[Project](oldDeps) } } From ac4312cc3bf93931d20750a37d1f1af723f2d268 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Thu, 3 Jul 2014 15:04:18 -0700 Subject: [PATCH 14/20] Revert "minor fix" This reverts commit 446768eca198e4cd2369c39a97c42251b5b5e2cd. --- dev/scalastyle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/scalastyle b/dev/scalastyle index 27962274bd60e..0e8fd5cc8d64c 100755 --- a/dev/scalastyle +++ b/dev/scalastyle @@ -19,7 +19,7 @@ echo -e "q\n" | SPARK_HIVE=true sbt/sbt scalastyle > scalastyle.txt # Check style with YARN alpha built too -echo -e "q\n" | sbt/sbt -Dhadoop.version=0.23.9 -Pyarn-alpha yarn-alpha/scalastyle \ +echo -e "q\n" | SPARK_HADOOP_VERSION=0.23.9 SPARK_YARN=true sbt/sbt yarn-alpha/scalastyle \ >> scalastyle.txt # Check style with YARN built too echo -e "q\n" | SPARK_HADOOP_VERSION=2.2.0 SPARK_YARN=true sbt/sbt yarn/scalastyle \ From acab73d08a735edbd8237c1240322e69ed36cfcb Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Thu, 3 Jul 2014 15:48:21 -0700 Subject: [PATCH 15/20] Revert "Small fix to run-examples script." This reverts commit 9439ea345899ada30bdcbb11d258ee8e15286c4c. --- bin/run-example | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/run-example b/bin/run-example index 0c30c12f4cb4e..e7a5fe3914fbd 100755 --- a/bin/run-example +++ b/bin/run-example @@ -34,9 +34,9 @@ else fi if [ -f "$FWDIR/RELEASE" ]; then - export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples*hadoop*.jar` -elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples*hadoop*.jar ]; then - export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples*hadoop*.jar` + export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar` +elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then + export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar` fi if [[ -z $SPARK_EXAMPLES_JAR ]]; then From 72651ca9aedb0ec6de08c5e5d99f5ae39adeb70e Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Thu, 3 Jul 2014 15:48:55 -0700 Subject: [PATCH 16/20] Addresses code reivew comments. --- dev/run-tests | 4 +-- project/SparkBuild.scala | 55 ++++++++++++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/dev/run-tests b/dev/run-tests index d9df020f7563c..edd17b53b3d8c 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -66,10 +66,10 @@ echo "=========================================================================" # (either resolution or compilation) prompts the user for input either q, r, # etc to quit or retry. This echo is there to make it not block. if [ -n "$_RUN_SQL_TESTS" ]; then - echo -e "q\n" | SPARK_HIVE=true sbt/sbt clean assembly test | \ + echo -e "q\n" | SPARK_HIVE=true sbt/sbt clean package assembly/assembly test | \ grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including" else - echo -e "q\n" | sbt/sbt clean assembly test | \ + echo -e "q\n" | sbt/sbt clean package assembly/assembly test | \ grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including" fi diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index b740004328b10..dbe17cb419c05 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -35,7 +35,8 @@ object BuildCommons { "streaming-zeromq").map(ProjectRef(buildLocation, _)) val optionallyEnabledProjects@Seq(yarn, yarnStable, yarnAlpha, java8Tests, sparkGangliaLgpl) = - Seq("yarn", "yarn-stable", "yarn-alpha", "java8-tests", "ganglia-lgpl").map(ProjectRef(buildLocation, _)) + Seq("yarn", "yarn-stable", "yarn-alpha", "java8-tests", "ganglia-lgpl") + .map(ProjectRef(buildLocation, _)) val assemblyProjects@Seq(assembly, examples, tools) = Seq("assembly", "examples", "tools") .map(ProjectRef(buildLocation, _)) @@ -50,23 +51,47 @@ object SparkBuild extends PomBuild { val projectsMap: Map[String, Seq[Setting[_]]] = Map.empty + // Provides compatibility for older versions of the Spark build def backwardCompatibility = { import scala.collection.mutable + var isAlphaYarn = false var profiles: mutable.Seq[String] = mutable.Seq.empty - if (Properties.envOrNone("SPARK_YARN").isDefined) profiles ++= Seq("yarn") - if (Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined) profiles ++= Seq("spark-ganglia-lgpl") - if (Properties.envOrNone("SPARK_HIVE").isDefined) profiles ++= Seq("hive") + if (Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined) { + println("NOTE: SPARK_GANGLIA_LGPL is deprecated, Use the -Pganglia-lgpl flag.") + profiles ++= Seq("spark-ganglia-lgpl") + } + if (Properties.envOrNone("SPARK_HIVE").isDefined) { + println("NOTE: SPARK_HIVE is deprecated, Use the -Phive flag.") + profiles ++= Seq("hive") + } Properties.envOrNone("SPARK_HADOOP_VERSION") match { - case Some(v) => System.setProperty("hadoop.version", v) + case Some(v) => + if (v.matches("0.23.*")) isAlphaYarn = true + println("NOTE: SPARK_HADOOP_VERSION is deprecated, please use -Dhadoop.version=" + v) + System.setProperty("hadoop.version", v) case None => } + if (Properties.envOrNone("SPARK_YARN").isDefined) { + if(isAlphaYarn) { + println("NOTE: SPARK_YARN is deprecated, Use the -Pyarn-alpha flag.") + profiles ++= Seq("yarn-alpha") + } + else { + println("NOTE: SPARK_YARN is deprecated, Use the -Pyarn flag.") + profiles ++= Seq("yarn") + } + } profiles } override val profiles = Properties.envOrNone("MAVEN_PROFILES") match { case None => backwardCompatibility // Rationale: If -P option exists no need to support backwardCompatibility. - case Some(v) => v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq + case Some(v) => + if (backwardCompatibility.nonEmpty) + println("Note: We ignore environment variables, when use of profile is detected in " + + "conjunction with environment variable.") + v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq } override val userPropertiesMap = System.getProperties.toMap @@ -106,17 +131,17 @@ object SparkBuild extends PomBuild { // Note ordering of these settings matter. /* Enable shared settings on all projects */ - allProjects ++ optionallyEnabledProjects ++ assemblyProjects foreach enable(sharedSettings) + (allProjects ++ optionallyEnabledProjects ++ assemblyProjects).foreach(enable(sharedSettings)) /* Enable tests settings for all projects except examples, assembly and tools */ - allProjects ++ optionallyEnabledProjects foreach enable(TestSettings.settings) + (allProjects ++ optionallyEnabledProjects).foreach(enable(TestSettings.settings)) - /* Enable Mima for all projects except spark, sql, hive, catalyst and repl */ - allProjects.filterNot(y => Seq(spark, sql, hive, catalyst, repl).exists(x => x == y)). + /* Enable Mima for all projects except spark, hive, catalyst and repl */ + allProjects.filterNot(y => Seq(spark, hive, catalyst, repl).exists(x => x == y)). foreach (x => enable(MimaBuild.mimaSettings(sparkHome, x))(x)) /* Enable Assembly for all assembly projects */ - assemblyProjects foreach enable(Assembly.settings) + assemblyProjects.foreach(enable(Assembly.settings)) /* Enable unidoc only for the root spark project */ enable(Unidoc.settings)(spark) @@ -171,7 +196,7 @@ object Assembly { lazy val settings = assemblySettings ++ Seq( test in assembly := {}, jarName in assembly <<= (version, moduleName) map { (v, mName) => mName + "-"+v + "-hadoop" + - Option(System.getProperty("hadoop.version")).getOrElse("1.0.4") + ".jar" }, // TODO: add proper default hadoop version. + Option(System.getProperty("hadoop.version")).getOrElse("1.0.4") + ".jar" }, mergeStrategy in assembly := { case PathList("org", "datanucleus", xs @ _*) => MergeStrategy.discard case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard @@ -246,8 +271,10 @@ object TestSettings { javaOptions in Test += "-Dspark.home=" + sparkHome, javaOptions in Test += "-Dspark.testing=1", javaOptions in Test += "-Dsun.io.serialization.extendedDebugInfo=true", - javaOptions in Test ++= System.getProperties.filter(_._1 startsWith "spark").map { case (k,v) => s"-D$k=$v" }.toSeq, - javaOptions in Test ++= "-Xmx3g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g".split(" ").toSeq, + javaOptions in Test ++= System.getProperties.filter(_._1 startsWith "spark") + .map { case (k,v) => s"-D$k=$v" }.toSeq, + javaOptions in Test ++= "-Xmx3g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g" + .split(" ").toSeq, javaOptions += "-Xmx3g", // Show full stack trace and duration in test cases. From 4b8875ea5aa22256cad5c48a7172b4cf5c26ff2d Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Thu, 3 Jul 2014 15:57:11 -0700 Subject: [PATCH 17/20] Sbt assembly no longer builds tools by default. --- project/SparkBuild.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index dbe17cb419c05..e78adcc88f4aa 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -38,9 +38,11 @@ object BuildCommons { Seq("yarn", "yarn-stable", "yarn-alpha", "java8-tests", "ganglia-lgpl") .map(ProjectRef(buildLocation, _)) - val assemblyProjects@Seq(assembly, examples, tools) = Seq("assembly", "examples", "tools") + val assemblyProjects@Seq(assembly, examples) = Seq("assembly", "examples") .map(ProjectRef(buildLocation, _)) + val tools = "tools" + val sparkHome = buildLocation } From fa6221d88c85b6fb1bf8437f0387ab8b5e2019b9 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Wed, 9 Jul 2014 14:11:03 +0530 Subject: [PATCH 18/20] Excluding sql from mima --- project/SparkBuild.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index e78adcc88f4aa..dc9a5827ce210 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -138,8 +138,9 @@ object SparkBuild extends PomBuild { /* Enable tests settings for all projects except examples, assembly and tools */ (allProjects ++ optionallyEnabledProjects).foreach(enable(TestSettings.settings)) - /* Enable Mima for all projects except spark, hive, catalyst and repl */ - allProjects.filterNot(y => Seq(spark, hive, catalyst, repl).exists(x => x == y)). + /* Enable Mima for all projects except spark, hive, catalyst, sql and repl */ + // TODO: Add Sql to mima checks + allProjects.filterNot(y => Seq(spark, sql, hive, catalyst, repl).exists(x => x == y)). foreach (x => enable(MimaBuild.mimaSettings(sparkHome, x))(x)) /* Enable Assembly for all assembly projects */ From 62b09bb9c9ad168b08608758245273d663a6140a Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Wed, 9 Jul 2014 14:19:19 +0530 Subject: [PATCH 19/20] Improvements. --- pom.xml | 2 +- project/SparkBuild.scala | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index 0fd41f626b8a5..fa80707d0929c 100644 --- a/pom.xml +++ b/pom.xml @@ -110,7 +110,7 @@ UTF-8 1.6 - + spark 2.10.4 2.10 0.18.1 diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index dc9a5827ce210..b55c50560bb93 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -59,11 +59,11 @@ object SparkBuild extends PomBuild { var isAlphaYarn = false var profiles: mutable.Seq[String] = mutable.Seq.empty if (Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined) { - println("NOTE: SPARK_GANGLIA_LGPL is deprecated, Use the -Pganglia-lgpl flag.") + println("NOTE: SPARK_GANGLIA_LGPL is deprecated, please use -Pganglia-lgpl flag.") profiles ++= Seq("spark-ganglia-lgpl") } if (Properties.envOrNone("SPARK_HIVE").isDefined) { - println("NOTE: SPARK_HIVE is deprecated, Use the -Phive flag.") + println("NOTE: SPARK_HIVE is deprecated, please use -Phive flag.") profiles ++= Seq("hive") } Properties.envOrNone("SPARK_HADOOP_VERSION") match { @@ -75,11 +75,11 @@ object SparkBuild extends PomBuild { } if (Properties.envOrNone("SPARK_YARN").isDefined) { if(isAlphaYarn) { - println("NOTE: SPARK_YARN is deprecated, Use the -Pyarn-alpha flag.") + println("NOTE: SPARK_YARN is deprecated, please use -Pyarn-alpha flag.") profiles ++= Seq("yarn-alpha") } else { - println("NOTE: SPARK_YARN is deprecated, Use the -Pyarn flag.") + println("NOTE: SPARK_YARN is deprecated, please use -Pyarn flag.") profiles ++= Seq("yarn") } } From a8ac9516b7e1da5e83cd38a0dcc43c68fc748cf1 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Thu, 10 Jul 2014 13:36:34 +0530 Subject: [PATCH 20/20] Updated sbt version. --- project/build.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/build.properties b/project/build.properties index bcde13f4362a7..c12ef652adfcb 100644 --- a/project/build.properties +++ b/project/build.properties @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -sbt.version=0.13.2 +sbt.version=0.13.5