From 283b57ce2dc735cefa39a2a6290f36debc7f962f Mon Sep 17 00:00:00 2001 From: James Baiera Date: Mon, 2 Nov 2020 11:58:46 -0500 Subject: [PATCH] Cross compile Scala code using Gradle variants (#1521) This PR introduces Gradle variants into the ES-Hadoop build in order to compile and test Scala code across different Scala versions instead of recursively invoking the build from within itself. This PR should allow us to upgrade to Scala 2.12 and above as well as newer versions of Spark without breaking release artifact contracts outside of a major release. Also included is the removal of the shared core source directories for the Spark integration. This shared source has been refactored into a core project that is cross compiled for the supported Spark and Scala versions used in the SQL integration. # Conflicts: # spark/sql-13/build.gradle # spark/sql-20/build.gradle --- .../hadoop/gradle/BuildPlugin.groovy | 844 ++++++++++++------ .../fixture/ElasticsearchFixturePlugin.groovy | 17 +- .../gradle/scala/ScalaVariantPlugin.groovy | 172 ---- .../gradle/scala/SparkVariantPlugin.java | 460 ++++++++++ .../gradle-plugins/scala.variants.properties | 1 - .../gradle-plugins/spark.variants.properties | 1 + dist/build.gradle | 39 +- gradle.properties | 6 + .../hadoop/hive/hive-date-mapping.json | 9 + .../hive/hive-date-mappingresponse.json | 11 + .../hadoop/hive/hive-date-source.json | 26 + .../hive/hive-date-typeless-mapping.json | 7 + licenses/protobuf-java-2.5.0.jar.sha1 | 1 + licenses/protobuf-java-LICENSE.txt | 32 + licenses/protobuf-java-NOTICE.txt | 0 licenses/spark-catalyst_2.11-2.3.0.jar.sha1 | 1 - licenses/spark-catalyst_2.11-2.4.4.jar.sha1 | 1 + licenses/spark-core_2.11-2.3.0.jar.sha1 | 1 - licenses/spark-core_2.11-2.4.4.jar.sha1 | 1 + licenses/spark-sql_2.11-2.3.0.jar.sha1 | 1 - licenses/spark-sql_2.11-2.4.4.jar.sha1 | 1 + licenses/spark-streaming_2.11-2.3.0.jar.sha1 | 1 - licenses/spark-streaming_2.11-2.4.4.jar.sha1 | 1 + licenses/spark-yarn_2.11-2.3.0.jar.sha1 | 1 - licenses/spark-yarn_2.11-2.4.4.jar.sha1 | 1 + qa/kerberos/build.gradle | 7 +- settings.gradle | 4 + spark/core/build.gradle | 148 +++ .../licenses/commons-logging-1.1.1.jar.sha1 | 1 + .../core/licenses/commons-logging-LICENSE.txt | 202 +++++ .../core/licenses/commons-logging-NOTICE.txt | 5 + spark/core/licenses/scala-LICENSE.txt | 29 + spark/core/licenses/scala-NOTICE.txt | 67 ++ .../licenses/scala-library-2.11.12.jar.sha1 | 1 + .../licenses/scala-reflect-2.11.12.jar.sha1 | 1 + spark/core/licenses/spark-LICENSE.txt | 202 +++++ spark/core/licenses/spark-NOTICE.txt | 28 + .../licenses/spark-core_2.11-2.4.4.jar.sha1 | 1 + .../AbstractHadoopBasicSparkTest.java | 0 .../integration/AbstractJavaEsSparkTest.java | 0 .../spark/integration/SparkSuite.java | 0 .../spark/integration/SparkUtils.java | 0 .../core/{ => src}/itest/resources/basic.json | 0 .../{ => src}/itest/resources/simple.json | 0 .../integration/AbstractScalaEsSpark.scala | 0 .../spark/integration/SparkScalaSuite.java | 0 .../spark/serialization/Bean.java | 0 .../spark/cfg/SparkSettings.java | 0 .../spark/cfg/SparkSettingsManager.java | 0 .../elasticsearch/spark/cfg/package-info.java | 0 .../org/elasticsearch/spark/package.scala | 0 .../spark/rdd/AbstractEsRDD.scala | 0 .../spark/rdd/AbstractEsRDDIterator.scala | 0 .../elasticsearch/spark/rdd/CompatUtils.java | 47 +- .../spark/rdd/CompatibilityLevel.scala | 0 .../elasticsearch/spark/rdd/EsRDDWriter.scala | 0 .../org/elasticsearch/spark/rdd/EsSpark.scala | 0 .../elasticsearch/spark/rdd/JavaEsRDD.scala | 0 .../org/elasticsearch/spark/rdd/Metadata.java | 0 .../elasticsearch/spark/rdd/ScalaEsRDD.scala | 0 .../spark/rdd/api/java/JavaEsSpark.scala | 0 .../spark/rdd/api/java/package-info.java | 0 .../spark/serialization/ReflectionUtils.scala | 0 .../ScalaMapFieldExtractor.scala | 0 .../ScalaMetadataExtractor.scala | 0 .../serialization/ScalaValueReader.scala | 0 .../serialization/ScalaValueWriter.scala | 0 .../ScalaExtendedBooleanValueReaderTest.scala | 0 .../spark/ScalaValueReaderTest.scala | 0 .../spark/cfg/SparkConfigTest.scala | 0 .../spark/serialization/Bean.java | 51 ++ .../ScalaReflectionUtilsTest.scala | 0 .../serialization/ScalaValueWriterTest.scala | 0 ...ScalaSerializationEventConverterTest.scala | 0 .../serialization/testbeans/Contact.java | 0 .../serialization/testbeans/ContactBook.java | 0 spark/sql-13/build.gradle | 342 ++++--- .../spark/integration/SparkUtils.java | 40 + spark/sql-13/src/itest/resources/basic.json | 1 + spark/sql-13/src/itest/resources/simple.json | 1 + .../AbstractScalaEsScalaSparkStreaming.scala | 4 +- .../spark/serialization/Bean.java | 51 ++ .../spark/serialization/Garbage.scala | 24 + .../spark/serialization/ModuleCaseClass.scala | 24 + .../spark/serialization/Trip.scala | 24 + .../spark/sql/ServiceLoadingTest.scala | 39 + spark/sql-20/build.gradle | 357 ++++---- .../licenses/protobuf-java-2.5.0.jar.sha1 | 1 + .../sql-20/licenses/protobuf-java-LICENSE.txt | 32 + .../sql-20/licenses/protobuf-java-NOTICE.txt | 0 .../spark-catalyst_2.11-2.3.0.jar.sha1 | 1 - .../spark-catalyst_2.11-2.4.4.jar.sha1 | 1 + .../licenses/spark-core_2.11-2.3.0.jar.sha1 | 1 - .../licenses/spark-core_2.11-2.4.4.jar.sha1 | 1 + .../licenses/spark-sql_2.11-2.3.0.jar.sha1 | 1 - .../licenses/spark-sql_2.11-2.4.4.jar.sha1 | 1 + .../spark-streaming_2.11-2.3.0.jar.sha1 | 1 - .../spark-streaming_2.11-2.4.4.jar.sha1 | 1 + .../licenses/spark-yarn_2.11-2.3.0.jar.sha1 | 1 - .../licenses/spark-yarn_2.11-2.4.4.jar.sha1 | 1 + .../spark/integration/SparkUtils.java | 40 + spark/sql-20/src/itest/resources/basic.json | 1 + spark/sql-20/src/itest/resources/simple.json | 1 + .../AbstractScalaEsScalaSparkStreaming.scala | 4 +- ...tractScalaEsSparkStructuredStreaming.scala | 21 +- .../spark/serialization/Bean.java | 51 ++ .../spark/serialization/Garbage.scala | 24 + .../spark/serialization/ModuleCaseClass.scala | 24 + .../spark/serialization/Trip.scala | 24 + .../spark/sql/ServiceLoadingTest.scala | 39 + storm/build.gradle | 15 + 111 files changed, 2749 insertions(+), 875 deletions(-) delete mode 100644 buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/scala/ScalaVariantPlugin.groovy create mode 100644 buildSrc/src/main/java/org/elasticsearch/hadoop/gradle/scala/SparkVariantPlugin.java delete mode 100644 buildSrc/src/main/resources/META-INF/gradle-plugins/scala.variants.properties create mode 100644 buildSrc/src/main/resources/META-INF/gradle-plugins/spark.variants.properties create mode 100644 hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-mapping.json create mode 100644 hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-mappingresponse.json create mode 100644 hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-source.json create mode 100644 hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-typeless-mapping.json create mode 100644 licenses/protobuf-java-2.5.0.jar.sha1 create mode 100644 licenses/protobuf-java-LICENSE.txt create mode 100644 licenses/protobuf-java-NOTICE.txt delete mode 100644 licenses/spark-catalyst_2.11-2.3.0.jar.sha1 create mode 100644 licenses/spark-catalyst_2.11-2.4.4.jar.sha1 delete mode 100644 licenses/spark-core_2.11-2.3.0.jar.sha1 create mode 100644 licenses/spark-core_2.11-2.4.4.jar.sha1 delete mode 100644 licenses/spark-sql_2.11-2.3.0.jar.sha1 create mode 100644 licenses/spark-sql_2.11-2.4.4.jar.sha1 delete mode 100644 licenses/spark-streaming_2.11-2.3.0.jar.sha1 create mode 100644 licenses/spark-streaming_2.11-2.4.4.jar.sha1 delete mode 100644 licenses/spark-yarn_2.11-2.3.0.jar.sha1 create mode 100644 licenses/spark-yarn_2.11-2.4.4.jar.sha1 create mode 100644 spark/core/build.gradle create mode 100644 spark/core/licenses/commons-logging-1.1.1.jar.sha1 create mode 100644 spark/core/licenses/commons-logging-LICENSE.txt create mode 100644 spark/core/licenses/commons-logging-NOTICE.txt create mode 100644 spark/core/licenses/scala-LICENSE.txt create mode 100644 spark/core/licenses/scala-NOTICE.txt create mode 100644 spark/core/licenses/scala-library-2.11.12.jar.sha1 create mode 100644 spark/core/licenses/scala-reflect-2.11.12.jar.sha1 create mode 100644 spark/core/licenses/spark-LICENSE.txt create mode 100644 spark/core/licenses/spark-NOTICE.txt create mode 100644 spark/core/licenses/spark-core_2.11-2.4.4.jar.sha1 rename spark/core/{ => src}/itest/java/org/elasticsearch/spark/integration/AbstractHadoopBasicSparkTest.java (100%) rename spark/core/{ => src}/itest/java/org/elasticsearch/spark/integration/AbstractJavaEsSparkTest.java (100%) rename spark/core/{ => src}/itest/java/org/elasticsearch/spark/integration/SparkSuite.java (100%) rename spark/core/{ => src}/itest/java/org/elasticsearch/spark/integration/SparkUtils.java (100%) rename spark/core/{ => src}/itest/resources/basic.json (100%) rename spark/core/{ => src}/itest/resources/simple.json (100%) rename spark/core/{ => src}/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSpark.scala (100%) rename spark/core/{ => src}/itest/scala/org/elasticsearch/spark/integration/SparkScalaSuite.java (100%) rename spark/core/{test => src/itest}/scala/org/elasticsearch/spark/serialization/Bean.java (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/cfg/SparkSettings.java (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/cfg/SparkSettingsManager.java (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/cfg/package-info.java (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/package.scala (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/rdd/AbstractEsRDD.scala (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/rdd/AbstractEsRDDIterator.scala (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/rdd/CompatUtils.java (69%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/rdd/CompatibilityLevel.scala (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/rdd/EsRDDWriter.scala (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/rdd/EsSpark.scala (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/rdd/JavaEsRDD.scala (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/rdd/Metadata.java (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/rdd/ScalaEsRDD.scala (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/rdd/api/java/JavaEsSpark.scala (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/rdd/api/java/package-info.java (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/serialization/ReflectionUtils.scala (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/serialization/ScalaMapFieldExtractor.scala (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/serialization/ScalaMetadataExtractor.scala (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/serialization/ScalaValueReader.scala (100%) rename spark/core/{ => src}/main/scala/org/elasticsearch/spark/serialization/ScalaValueWriter.scala (100%) rename spark/core/{ => src}/test/scala/org/elasticsearch/spark/ScalaExtendedBooleanValueReaderTest.scala (100%) rename spark/core/{ => src}/test/scala/org/elasticsearch/spark/ScalaValueReaderTest.scala (100%) rename spark/core/{ => src}/test/scala/org/elasticsearch/spark/cfg/SparkConfigTest.scala (100%) create mode 100644 spark/core/src/test/scala/org/elasticsearch/spark/serialization/Bean.java rename spark/core/{ => src}/test/scala/org/elasticsearch/spark/serialization/ScalaReflectionUtilsTest.scala (100%) rename spark/core/{ => src}/test/scala/org/elasticsearch/spark/serialization/ScalaValueWriterTest.scala (100%) rename spark/core/{ => src}/test/scala/org/elasticsearch/spark/serialization/handler/write/imple/ScalaSerializationEventConverterTest.scala (100%) rename spark/core/{ => src}/test/scala/org/elasticsearch/spark/serialization/testbeans/Contact.java (100%) rename spark/core/{ => src}/test/scala/org/elasticsearch/spark/serialization/testbeans/ContactBook.java (100%) create mode 100644 spark/sql-13/src/itest/java/org/elasticsearch/spark/integration/SparkUtils.java create mode 100644 spark/sql-13/src/itest/resources/basic.json create mode 100644 spark/sql-13/src/itest/resources/simple.json create mode 100644 spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/Bean.java create mode 100644 spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/Garbage.scala create mode 100644 spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/ModuleCaseClass.scala create mode 100644 spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/Trip.scala create mode 100644 spark/sql-13/src/itest/scala/org/elasticsearch/spark/sql/ServiceLoadingTest.scala create mode 100644 spark/sql-20/licenses/protobuf-java-2.5.0.jar.sha1 create mode 100644 spark/sql-20/licenses/protobuf-java-LICENSE.txt create mode 100644 spark/sql-20/licenses/protobuf-java-NOTICE.txt delete mode 100644 spark/sql-20/licenses/spark-catalyst_2.11-2.3.0.jar.sha1 create mode 100644 spark/sql-20/licenses/spark-catalyst_2.11-2.4.4.jar.sha1 delete mode 100644 spark/sql-20/licenses/spark-core_2.11-2.3.0.jar.sha1 create mode 100644 spark/sql-20/licenses/spark-core_2.11-2.4.4.jar.sha1 delete mode 100644 spark/sql-20/licenses/spark-sql_2.11-2.3.0.jar.sha1 create mode 100644 spark/sql-20/licenses/spark-sql_2.11-2.4.4.jar.sha1 delete mode 100644 spark/sql-20/licenses/spark-streaming_2.11-2.3.0.jar.sha1 create mode 100644 spark/sql-20/licenses/spark-streaming_2.11-2.4.4.jar.sha1 delete mode 100644 spark/sql-20/licenses/spark-yarn_2.11-2.3.0.jar.sha1 create mode 100644 spark/sql-20/licenses/spark-yarn_2.11-2.4.4.jar.sha1 create mode 100644 spark/sql-20/src/itest/java/org/elasticsearch/spark/integration/SparkUtils.java create mode 100644 spark/sql-20/src/itest/resources/basic.json create mode 100644 spark/sql-20/src/itest/resources/simple.json create mode 100644 spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/Bean.java create mode 100644 spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/Garbage.scala create mode 100644 spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/ModuleCaseClass.scala create mode 100644 spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/Trip.scala create mode 100644 spark/sql-20/src/itest/scala/org/elasticsearch/spark/sql/ServiceLoadingTest.scala diff --git a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/BuildPlugin.groovy b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/BuildPlugin.groovy index d82502dfc..4ca51b0fd 100644 --- a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/BuildPlugin.groovy +++ b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/BuildPlugin.groovy @@ -6,30 +6,35 @@ import org.elasticsearch.gradle.precommit.DependencyLicensesTask import org.elasticsearch.gradle.precommit.LicenseHeadersTask import org.elasticsearch.gradle.precommit.UpdateShasTask import org.elasticsearch.gradle.testclusters.StandaloneRestIntegTestTask +import org.elasticsearch.hadoop.gradle.scala.SparkVariantPlugin import org.gradle.api.Plugin import org.gradle.api.Project import org.gradle.api.Task +import org.gradle.api.XmlProvider import org.gradle.api.artifacts.Configuration import org.gradle.api.artifacts.Dependency import org.gradle.api.artifacts.DependencyResolveDetails import org.gradle.api.artifacts.ModuleDependency import org.gradle.api.artifacts.ProjectDependency import org.gradle.api.artifacts.ResolutionStrategy -import org.gradle.api.artifacts.maven.MavenPom -import org.gradle.api.artifacts.maven.MavenResolver import org.gradle.api.attributes.LibraryElements import org.gradle.api.attributes.Usage +import org.gradle.api.component.SoftwareComponentFactory import org.gradle.api.file.CopySpec import org.gradle.api.file.FileCollection import org.gradle.api.java.archives.Manifest import org.gradle.api.plugins.JavaLibraryPlugin -import org.gradle.api.plugins.MavenPlugin -import org.gradle.api.plugins.MavenPluginConvention +import org.gradle.api.plugins.JavaPlugin import org.gradle.api.plugins.scala.ScalaPlugin +import org.gradle.api.provider.Provider +import org.gradle.api.publish.maven.MavenPom +import org.gradle.api.publish.maven.MavenPublication +import org.gradle.api.publish.maven.tasks.GenerateMavenPom +import org.gradle.api.publish.maven.tasks.PublishToMavenRepository +import org.gradle.api.tasks.Copy import org.gradle.api.tasks.SourceSet import org.gradle.api.tasks.SourceSetContainer import org.gradle.api.tasks.TaskProvider -import org.gradle.api.tasks.Upload import org.gradle.api.tasks.bundling.Jar import org.gradle.api.tasks.compile.JavaCompile import org.gradle.api.tasks.javadoc.Javadoc @@ -38,9 +43,25 @@ import org.gradle.external.javadoc.JavadocOutputLevel import org.gradle.external.javadoc.MinimalJavadocOptions import org.gradle.plugins.ide.eclipse.EclipsePlugin import org.gradle.plugins.ide.idea.IdeaPlugin +import org.w3c.dom.NodeList + +import javax.inject.Inject + +import static org.elasticsearch.hadoop.gradle.scala.SparkVariantPlugin.SparkVariantPluginExtension +import static org.elasticsearch.hadoop.gradle.scala.SparkVariantPlugin.SparkVariant class BuildPlugin implements Plugin { + public static final String SHARED_TEST_IMPLEMENTATION_CONFIGURATION_NAME = "sharedTestImplementation" + public static final String SHARED_ITEST_IMPLEMENTATION_CONFIGURATION_NAME = "sharedItestImplementation" + + private final SoftwareComponentFactory softwareComponentFactory + + @Inject + BuildPlugin(SoftwareComponentFactory softwareComponentFactory) { + this.softwareComponentFactory = softwareComponentFactory + } + @Override void apply(Project project) { configurePlugins(project) @@ -68,9 +89,6 @@ class BuildPlugin implements Plugin { // IDE Support project.getPluginManager().apply(IdeaPlugin.class) project.getPluginManager().apply(EclipsePlugin.class) - - // Maven Support - project.getPluginManager().apply(MavenPlugin.class) } /** Return the configuration name used for finding transitive deps of the given dependency. */ @@ -78,61 +96,104 @@ class BuildPlugin implements Plugin { return "_transitive_${groupId}_${artifactId}_${version}" } + /** + * Applies a closure to all dependencies in a configuration (currently or in the future) that disables the + * resolution of transitive dependencies except for projects in the group org.elasticsearch. + * @param configuration to disable transitive dependencies on + */ + static void disableTransitiveDependencies(Project project, Configuration configuration) { + configuration.dependencies.all { Dependency dep -> + if (dep instanceof ModuleDependency && !(dep instanceof ProjectDependency) && dep.group.startsWith('org.elasticsearch') == false) { + dep.transitive = false + + // also create a configuration just for this dependency version, so that later + // we can determine which transitive dependencies it has + String depConfig = transitiveDepConfigName(dep.group, dep.name, dep.version) + if (project.configurations.findByName(depConfig) == null) { + project.configurations.create(depConfig) + project.dependencies.add(depConfig, "${dep.group}:${dep.name}:${dep.version}") + } + } + } + } + + private static Configuration createConfiguration(Project project, String configurationName, boolean canBeConsumed, boolean canBeResolved, + String usageAttribute) { + return createConfiguration(project, configurationName, canBeConsumed, canBeResolved, usageAttribute, null) + } + + private static Configuration createConfiguration(Project project, String configurationName, boolean canBeConsumed, boolean canBeResolved, + String usageAttribute, String libraryElements) { + Configuration configuration = project.configurations.create(configurationName) + configuration.canBeConsumed = canBeConsumed + configuration.canBeResolved = canBeResolved + configuration.attributes { + // Changing USAGE is required when working with Scala projects, otherwise the source dirs get pulled + // into incremental compilation analysis. + attribute(Usage.USAGE_ATTRIBUTE, project.objects.named(Usage, usageAttribute)) + if (libraryElements != null) { + attribute(LibraryElements.LIBRARY_ELEMENTS_ATTRIBUTE, project.objects.named(LibraryElements, libraryElements)) + } + } + return configuration + } + private static void configureConfigurations(Project project) { + // Create a configuration that will hold common test dependencies to be shared with all of a project's test sources, including variants if present + Configuration sharedTestImplementation = project.configurations.create(SHARED_TEST_IMPLEMENTATION_CONFIGURATION_NAME) + project.configurations.getByName(JavaPlugin.TEST_IMPLEMENTATION_CONFIGURATION_NAME).extendsFrom(sharedTestImplementation) + project.getPlugins().withType(SparkVariantPlugin).whenPluginAdded { + SparkVariantPluginExtension sparkVariants = project.getExtensions().getByType(SparkVariantPluginExtension.class) + sparkVariants.featureVariants { SparkVariant variant -> + Configuration variantTestImplementation = project.configurations.getByName(variant.configuration(SourceSet.TEST_SOURCE_SET_NAME, JavaPlugin.IMPLEMENTATION_CONFIGURATION_NAME)) + variantTestImplementation.extendsFrom(sharedTestImplementation) + } + } + if (project != project.rootProject) { // Set up avenues for sharing source files between projects in order to create embedded Javadocs // Import source configuration - Configuration sources = project.configurations.create("additionalSources") - sources.canBeConsumed = false - sources.canBeResolved = true - sources.attributes { - // Changing USAGE is required when working with Scala projects, otherwise the source dirs get pulled - // into incremental compilation analysis. - attribute(Usage.USAGE_ATTRIBUTE, project.objects.named(Usage, 'java-source')) - attribute(LibraryElements.LIBRARY_ELEMENTS_ATTRIBUTE, project.objects.named(LibraryElements, 'sources')) - } - - // Export source configuration - Configuration sourceElements = project.configurations.create("sourceElements") - sourceElements.canBeConsumed = true - sourceElements.canBeResolved = false - sourceElements.extendsFrom(sources) - sourceElements.attributes { - // Changing USAGE is required when working with Scala projects, otherwise the source dirs get pulled - // into incremental compilation analysis. - attribute(Usage.USAGE_ATTRIBUTE, project.objects.named(Usage, 'java-source')) - attribute(LibraryElements.LIBRARY_ELEMENTS_ATTRIBUTE, project.objects.named(LibraryElements, 'sources')) - } + Configuration additionalSources = createConfiguration(project, 'additionalSources', false, true, 'java-source', 'sources') + + // Export source configuration - different from 'sourcesElements' which contains sourceJars instead of source files + Configuration sourceElements = createConfiguration(project, 'sourceElements', true, false, 'java-source', 'sources') + sourceElements.extendsFrom(additionalSources) // Import javadoc sources - Configuration javadocSources = project.configurations.create("javadocSources") - javadocSources.canBeConsumed = false - javadocSources.canBeResolved = true - javadocSources.attributes { - // Changing USAGE is required when working with Scala projects, otherwise the source dirs get pulled - // into incremental compilation analysis. - attribute(Usage.USAGE_ATTRIBUTE, project.objects.named(Usage, 'javadoc-source')) - attribute(LibraryElements.LIBRARY_ELEMENTS_ATTRIBUTE, project.objects.named(LibraryElements, 'sources')) - } - - // Export source configuration - Configuration javadocElements = project.configurations.create("javadocElements") - javadocElements.canBeConsumed = true - javadocElements.canBeResolved = false - javadocElements.extendsFrom(sources) - javadocElements.attributes { - // Changing USAGE is required when working with Scala projects, otherwise the source dirs get pulled - // into incremental compilation analysis. - attribute(Usage.USAGE_ATTRIBUTE, project.objects.named(Usage, 'javadoc-source')) - attribute(LibraryElements.LIBRARY_ELEMENTS_ATTRIBUTE, project.objects.named(LibraryElements, 'sources')) - } + createConfiguration(project, 'javadocSources', false, true, 'javadoc-source', 'sources') + + // Export javadoc source configuration - different from 'javadocElements' which contains javadocJars instead of java source files used to generate javadocs + Configuration javadocSourceElements = createConfiguration(project, 'javadocSourceElements', true, false, 'javadoc-source', 'sources') + javadocSourceElements.extendsFrom(additionalSources) // Export configuration for archives that should be in the distribution - Configuration distElements = project.configurations.create('distElements') - distElements.canBeConsumed = true - distElements.canBeResolved = false - distElements.attributes { - attribute(Usage.USAGE_ATTRIBUTE, project.objects.named(Usage, 'packaging')) + // TODO: Should we ditch this in favor of just using the built in exporting configurations? all three artifact types have them now + createConfiguration(project, 'distElements', true, false, 'packaging') + + // Do the same for any variants if the project has them + project.getPlugins().withType(SparkVariantPlugin).whenPluginAdded { + SparkVariantPluginExtension sparkVariants = project.getExtensions().getByType(SparkVariantPluginExtension.class) + sparkVariants.featureVariants { SparkVariant variant -> + Configuration vAdditionalSources = createConfiguration(project, variant.configuration('additionalSources'), false, true, 'java-source', 'sources') + + Configuration vSourceElements = createConfiguration(project, variant.configuration('sourceElements'), true, false, 'java-source', 'sources') + vSourceElements.extendsFrom(vAdditionalSources) + + createConfiguration(project, variant.configuration('javadocSources'), false, true, 'javadoc-source', 'sources') + + Configuration vJavadocSourceElements = createConfiguration(project, variant.configuration('javadocSourceElements'), true, false, 'javadoc-source', 'sources') + vJavadocSourceElements.extendsFrom(vAdditionalSources) + + createConfiguration(project, variant.configuration('distElements'), true, false, 'packaging') + } + sparkVariants.all { SparkVariant variant -> + // Set capabilities on ALL variants if variants are enabled. + // These are required to differentiate the different producing configurations from each other when resolving artifacts for consuming configurations. + String variantCapability = variant.getCapabilityName(project.getVersion()) + project.configurations.getByName(variant.configuration('sourceElements')).getOutgoing().capability(variantCapability) + project.configurations.getByName(variant.configuration('javadocSourceElements')).getOutgoing().capability(variantCapability) + project.configurations.getByName(variant.configuration('distElements')).getOutgoing().capability(variantCapability) + } } } @@ -141,24 +202,20 @@ class BuildPlugin implements Plugin { } // force all dependencies added directly to compile/testCompile to be non-transitive, except for Elasticsearch projects - Closure disableTransitiveDeps = { Dependency dep -> - if (dep instanceof ModuleDependency && !(dep instanceof ProjectDependency) && dep.group.startsWith('org.elasticsearch') == false) { - dep.transitive = false - - // also create a configuration just for this dependency version, so that later - // we can determine which transitive dependencies it has - String depConfig = transitiveDepConfigName(dep.group, dep.name, dep.version) - if (project.configurations.findByName(depConfig) == null) { - project.configurations.create(depConfig) - project.dependencies.add(depConfig, "${dep.group}:${dep.name}:${dep.version}") - } + disableTransitiveDependencies(project, project.configurations.api) + disableTransitiveDependencies(project, project.configurations.implementation) + disableTransitiveDependencies(project, project.configurations.compileOnly) + disableTransitiveDependencies(project, project.configurations.runtimeOnly) + + project.getPlugins().withType(SparkVariantPlugin).whenPluginAdded { + SparkVariantPluginExtension sparkVariants = project.getExtensions().getByType(SparkVariantPluginExtension.class) + sparkVariants.featureVariants { SparkVariant variant -> + disableTransitiveDependencies(project, project.getConfigurations().findByName(variant.configuration("api"))) + disableTransitiveDependencies(project, project.getConfigurations().findByName(variant.configuration("implementation"))) + disableTransitiveDependencies(project, project.getConfigurations().findByName(variant.configuration("compileOnly"))) + disableTransitiveDependencies(project, project.getConfigurations().findByName(variant.configuration("runtimeOnly"))) } } - - project.configurations.api.dependencies.all(disableTransitiveDeps) - project.configurations.implementation.dependencies.all(disableTransitiveDeps) - project.configurations.compileOnly.dependencies.all(disableTransitiveDeps) - project.configurations.runtimeOnly.dependencies.all(disableTransitiveDeps) } /** @@ -166,30 +223,58 @@ class BuildPlugin implements Plugin { * @param project to be configured */ private static void configureDependencies(Project project) { - // Create an itest source set, which will set up itest based configurations SourceSetContainer sourceSets = project.sourceSets as SourceSetContainer - sourceSets.create('itest') + SourceSet main = sourceSets.getByName('main') + + // Create an itest source set, just like the test source set + SourceSet itest = sourceSets.create('itest') + itest.setCompileClasspath(project.objects.fileCollection().from(main.getOutput(), project.getConfigurations().getByName('itestCompileClasspath'))) + itest.setRuntimeClasspath(project.objects.fileCollection().from(itest.getOutput(), main.getOutput(), project.getConfigurations().getByName('itestRuntimeClasspath'))) + + // Set configuration extension for itest: + // shared test <-- shared itest <-- itest + // test <-- itest + Configuration sharedItestImplementation = project.configurations.create(SHARED_ITEST_IMPLEMENTATION_CONFIGURATION_NAME) + Configuration sharedTestImplementation = project.configurations.getByName(SHARED_TEST_IMPLEMENTATION_CONFIGURATION_NAME) + Configuration testImplementation = project.configurations.getByName('testImplementation') + Configuration itestImplementation = project.configurations.getByName('itestImplementation') + sharedItestImplementation.extendsFrom(sharedTestImplementation) + itestImplementation.extendsFrom(sharedItestImplementation) + itestImplementation.extendsFrom(testImplementation) + + // Create an itest source set for each variant + project.getPlugins().withType(SparkVariantPlugin).whenPluginAdded { + SparkVariantPluginExtension sparkVariants = project.getExtensions().getByType(SparkVariantPluginExtension.class) + sparkVariants.featureVariants { SparkVariant variant -> + SparkVariantPlugin.configureAdditionalVariantSourceSet(project, variant, 'itest') + + Configuration variantTestImplementation = project.configurations.getByName(variant.configuration('test', 'implementation')) + Configuration variantITestImplementation = project.configurations.getByName(variant.configuration('itest', 'implementation')) + variantITestImplementation.extendsFrom(sharedItestImplementation) + variantITestImplementation.extendsFrom(variantTestImplementation) + } + } // Detail all common dependencies project.dependencies { - testImplementation("junit:junit:${project.ext.junitVersion}") - testImplementation("org.hamcrest:hamcrest-all:${project.ext.hamcrestVersion}") - - testImplementation("joda-time:joda-time:2.8") - - testImplementation("org.slf4j:slf4j-log4j12:1.7.6") - testImplementation("org.apache.logging.log4j:log4j-api:${project.ext.log4jVersion}") - testImplementation("org.apache.logging.log4j:log4j-core:${project.ext.log4jVersion}") - testImplementation("org.apache.logging.log4j:log4j-1.2-api:${project.ext.log4jVersion}") - testImplementation("net.java.dev.jna:jna:4.2.2") - testImplementation("org.codehaus.groovy:groovy:${project.ext.groovyVersion}:indy") - testImplementation("org.locationtech.spatial4j:spatial4j:0.6") - testImplementation("com.vividsolutions:jts:1.13") - - itestImplementation(project.sourceSets.main.output) - itestImplementation(project.configurations.testImplementation) - itestImplementation(project.sourceSets.test.output) - itestImplementation(project.configurations.testRuntimeClasspath) + add(SHARED_TEST_IMPLEMENTATION_CONFIGURATION_NAME, "junit:junit:${project.ext.junitVersion}") + add(SHARED_TEST_IMPLEMENTATION_CONFIGURATION_NAME, "org.hamcrest:hamcrest-all:${project.ext.hamcrestVersion}") + add(SHARED_TEST_IMPLEMENTATION_CONFIGURATION_NAME, "joda-time:joda-time:2.8") + add(SHARED_TEST_IMPLEMENTATION_CONFIGURATION_NAME, "org.slf4j:slf4j-log4j12:1.7.6") + add(SHARED_TEST_IMPLEMENTATION_CONFIGURATION_NAME, "org.apache.logging.log4j:log4j-api:${project.ext.log4jVersion}") + add(SHARED_TEST_IMPLEMENTATION_CONFIGURATION_NAME, "org.apache.logging.log4j:log4j-core:${project.ext.log4jVersion}") + add(SHARED_TEST_IMPLEMENTATION_CONFIGURATION_NAME, "org.apache.logging.log4j:log4j-1.2-api:${project.ext.log4jVersion}") + add(SHARED_TEST_IMPLEMENTATION_CONFIGURATION_NAME, "net.java.dev.jna:jna:4.2.2") + add(SHARED_TEST_IMPLEMENTATION_CONFIGURATION_NAME, "org.codehaus.groovy:groovy:${project.ext.groovyVersion}:indy") + add(SHARED_TEST_IMPLEMENTATION_CONFIGURATION_NAME, "org.locationtech.spatial4j:spatial4j:0.6") + add(SHARED_TEST_IMPLEMENTATION_CONFIGURATION_NAME, "com.vividsolutions:jts:1.13") + + // TODO: May not be needed on all itests + add(SHARED_ITEST_IMPLEMENTATION_CONFIGURATION_NAME, "org.apache.hadoop:hadoop-minikdc:${project.ext.minikdcVersion}") { + // For some reason, the dependencies that are pulled in with MiniKDC have multiple resource files + // that cause issues when they are loaded. We exclude the ldap schema data jar to get around this. + exclude group: "org.apache.directory.api", module: "api-ldap-schema-data" + } } // Deal with the messy conflicts out there @@ -244,7 +329,7 @@ class BuildPlugin implements Plugin { FileCollection javaSourceDirs = mainSourceSet.java.sourceDirectories javaSourceDirs.each { File srcDir -> project.getArtifacts().add('sourceElements', srcDir) - project.getArtifacts().add('javadocElements', srcDir) + project.getArtifacts().add('javadocSourceElements', srcDir) } // Add scala sources to source elements if that plugin is applied @@ -254,121 +339,204 @@ class BuildPlugin implements Plugin { project.getArtifacts().add('sourceElements', scalaSrcDir) } } + + // Do the same for any variants + project.getPlugins().withType(SparkVariantPlugin).whenPluginAdded { + SparkVariantPluginExtension sparkVariants = project.getExtensions().getByType(SparkVariantPluginExtension.class) + sparkVariants.featureVariants { SparkVariant variant -> + SourceSet variantMainSourceSet = project.sourceSets.getByName(variant.getSourceSetName('main')) + + FileCollection variantJavaSourceDirs = variantMainSourceSet.java.sourceDirectories + variantJavaSourceDirs.each { File srcDir -> + project.getArtifacts().add(variant.configuration('sourceElements'), srcDir) + project.getArtifacts().add(variant.configuration('javadocSourceElements'), srcDir) + } + + FileCollection variantScalaSourceDirs = variantMainSourceSet.scala.sourceDirectories + variantScalaSourceDirs.each { File scalaSrcDir -> + project.getArtifacts().add(variant.configuration('sourceElements'), scalaSrcDir) + } + } + } } - JavaCompile compileJava = project.tasks.getByName('compileJava') as JavaCompile - compileJava.getOptions().setCompilerArgs(['-Xlint:unchecked', '-Xlint:options']) + project.tasks.withType(JavaCompile) { JavaCompile compile -> + compile.getOptions().setCompilerArgs(['-Xlint:unchecked', '-Xlint:options']) + } // Enable HTML test reports - Test testTask = project.tasks.getByName('test') as Test - testTask.getReports().getByName('html').setEnabled(true) + project.tasks.withType(Test) { Test testTask -> + testTask.getReports().getByName('html').setEnabled(true) + } // Configure project jar task with manifest and include license and notice data. - Jar jar = project.tasks.getByName('jar') as Jar - - Manifest manifest = jar.getManifest() - manifest.attributes["Created-By"] = "${System.getProperty("java.version")} (${System.getProperty("java.specification.vendor")})" - manifest.attributes['Implementation-Title'] = project.name - manifest.attributes['Implementation-Version'] = project.version - manifest.attributes['Implementation-URL'] = "https://github.com/elastic/elasticsearch-hadoop" - manifest.attributes['Implementation-Vendor'] = "Elastic" - manifest.attributes['Implementation-Vendor-Id'] = "org.elasticsearch.hadoop" - manifest.attributes['Repository-Revision'] = BuildParams.gitRevision - String build = System.env['ESHDP.BUILD'] - if (build != null) { - manifest.attributes['Build'] = build - } + project.tasks.withType(Jar) { Jar jar -> + Manifest manifest = jar.getManifest() + manifest.attributes["Created-By"] = "${System.getProperty("java.version")} (${System.getProperty("java.specification.vendor")})" + manifest.attributes['Implementation-Title'] = project.name + manifest.attributes['Implementation-Version'] = project.version + manifest.attributes['Implementation-URL'] = "https://github.com/elastic/elasticsearch-hadoop" + manifest.attributes['Implementation-Vendor'] = "Elastic" + manifest.attributes['Implementation-Vendor-Id'] = "org.elasticsearch.hadoop" + manifest.attributes['Repository-Revision'] = BuildParams.gitRevision + String build = System.env['ESHDP.BUILD'] + if (build != null) { + manifest.attributes['Build'] = build + } - jar.from("${project.rootDir}/docs/src/info") { CopySpec spec -> - spec.include("license.txt") - spec.include("notice.txt") - spec.into("META-INF") - spec.expand(copyright: new Date().format('yyyy'), version: project.version) + // TODO: Are these better to be set on just the jar or do these make sense to be on all jars (jar, javadoc, source)? + jar.from("${project.rootDir}/docs/src/info") { CopySpec spec -> + spec.include("license.txt") + spec.include("notice.txt") + spec.into("META-INF") + spec.expand(copyright: new Date().format('yyyy'), version: project.version) + } } if (project != project.rootProject) { - project.getArtifacts().add('distElements', jar) + project.getArtifacts().add('distElements', project.tasks.getByName('jar')) + project.getPlugins().withType(SparkVariantPlugin).whenPluginAdded { + SparkVariantPluginExtension sparkVariants = project.getExtensions().getByType(SparkVariantPluginExtension.class) + sparkVariants.featureVariants { SparkVariant variant -> + project.getArtifacts().add(variant.configuration('distElements'), project.tasks.getByName(variant.taskName('jar'))) + } + } } - // Jar up the sources of the project - Jar sourcesJar = project.tasks.create('sourcesJar', Jar) + // Creates jar tasks and producer configurations for javadocs and sources. + // Producer configurations (javadocElements and sourcesElements) contain javadoc and source JARS. This makes + // them more akin to distElements than the source code configurations (javadocSourceElements and sourceElements) + project.java { + withJavadocJar() + withSourcesJar() + } + Jar sourcesJar = project.tasks.getByName('sourcesJar') as Jar sourcesJar.dependsOn(project.tasks.classes) - sourcesJar.classifier = 'sources' - sourcesJar.from(project.sourceSets.main.allSource) // TODO: Remove when root project does not handle distribution if (project != project.rootProject) { sourcesJar.from(project.configurations.additionalSources) project.getArtifacts().add('distElements', sourcesJar) } + project.getPlugins().withType(SparkVariantPlugin).whenPluginAdded { + SparkVariantPluginExtension sparkVariants = project.getExtensions().getByType(SparkVariantPluginExtension.class) + sparkVariants.featureVariants { SparkVariant variant -> + // Don't need to create sources jar task since it is already created by the variant plugin + Jar variantSourcesJar = project.tasks.getByName(variant.taskName('sourcesJar')) as Jar + variantSourcesJar.dependsOn(project.tasks.getByName(variant.taskName('classes'))) + variantSourcesJar.from(project.configurations.getByName(variant.configuration('additionalSources'))) + project.getArtifacts().add(variant.configuration('distElements'), variantSourcesJar) + } + } // Configure javadoc - Javadoc javadoc = project.tasks.getByName('javadoc') as Javadoc - javadoc.title = "${project.rootProject.description} ${project.version} API" - javadoc.excludes = [ - "org/elasticsearch/hadoop/mr/compat/**", - "org/elasticsearch/hadoop/rest/**", - "org/elasticsearch/hadoop/serialization/**", - "org/elasticsearch/hadoop/util/**", - "org/apache/hadoop/hive/**" - ] + project.tasks.withType(Javadoc) { Javadoc javadoc -> + javadoc.title = "${project.rootProject.description} ${project.version} API" + javadoc.excludes = [ + "org/elasticsearch/hadoop/mr/compat/**", + "org/elasticsearch/hadoop/rest/**", + "org/elasticsearch/hadoop/serialization/**", + "org/elasticsearch/hadoop/util/**", + "org/apache/hadoop/hive/**" + ] + // Set javadoc executable to runtime Java (1.8) + javadoc.executable = new File(project.ext.runtimeJavaHome, 'bin/javadoc') + + MinimalJavadocOptions javadocOptions = javadoc.getOptions() + javadocOptions.docFilesSubDirs = true + javadocOptions.outputLevel = JavadocOutputLevel.QUIET + javadocOptions.breakIterator = true + javadocOptions.author = false + javadocOptions.header = project.name + javadocOptions.showFromProtected() + javadocOptions.addStringOption('Xdoclint:none', '-quiet') + javadocOptions.groups = [ + 'Elasticsearch Map/Reduce' : ['org.elasticsearch.hadoop.mr*'], + 'Elasticsearch Hive' : ['org.elasticsearch.hadoop.hive*'], + 'Elasticsearch Pig' : ['org.elasticsearch.hadoop.pig*'], + 'Elasticsearch Spark' : ['org.elasticsearch.spark*'], + 'Elasticsearch Storm' : ['org.elasticsearch.storm*'], + ] + javadocOptions.links = [ // External doc links + "https://docs.oracle.com/javase/8/docs/api/", + "https://commons.apache.org/proper/commons-logging/apidocs/", + "https://hadoop.apache.org/docs/stable2/api/", + "https://pig.apache.org/docs/r0.15.0/api/", + "https://hive.apache.org/javadocs/r1.2.2/api/", + "https://spark.apache.org/docs/latest/api/java/", + "https://storm.apache.org/releases/current/javadocs/" + ] + } // TODO: Remove when root project does not handle distribution if (project != project.rootProject) { - javadoc.source += project.files(project.configurations.javadocSources) - } - // Set javadoc executable to runtime Java (1.8) - javadoc.executable = new File(project.ext.runtimeJavaHome, 'bin/javadoc') - - MinimalJavadocOptions javadocOptions = javadoc.getOptions() - javadocOptions.docFilesSubDirs = true - javadocOptions.outputLevel = JavadocOutputLevel.QUIET - javadocOptions.breakIterator = true - javadocOptions.author = false - javadocOptions.header = project.name - javadocOptions.showFromProtected() - javadocOptions.addStringOption('Xdoclint:none', '-quiet') - javadocOptions.groups = [ - 'Elasticsearch Map/Reduce' : ['org.elasticsearch.hadoop.mr*'], - 'Elasticsearch Hive' : ['org.elasticsearch.hadoop.hive*'], - 'Elasticsearch Pig' : ['org.elasticsearch.hadoop.pig*'], - 'Elasticsearch Spark' : ['org.elasticsearch.spark*'], - 'Elasticsearch Storm' : ['org.elasticsearch.storm*'], - ] - javadocOptions.links = [ // External doc links - "https://docs.oracle.com/javase/8/docs/api/", - "https://commons.apache.org/proper/commons-logging/apidocs/", - "https://hadoop.apache.org/docs/stable2/api/", - "https://pig.apache.org/docs/r0.15.0/api/", - "https://hive.apache.org/javadocs/r1.2.2/api/", - "https://spark.apache.org/docs/latest/api/java/", - "https://storm.apache.org/releases/current/javadocs/" - ] + Javadoc javadoc = project.tasks.getByName('javadoc') as Javadoc + javadoc.source(project.configurations.javadocSources) + project.getPlugins().withType(SparkVariantPlugin).whenPluginAdded { + SparkVariantPluginExtension sparkVarients = project.getExtensions().getByType(SparkVariantPluginExtension.class) + sparkVarients.featureVariants { SparkVariant variant -> + Javadoc variantJavadoc = project.tasks.getByName(variant.taskName('javadoc')) as Javadoc + variantJavadoc.source(project.configurations.getByName(variant.configuration('javadocSources'))) + } + } + } // Package up the javadocs into their own jar - Jar javadocJar = project.tasks.create('javadocJar', Jar) - javadocJar.classifier = 'javadoc' - javadocJar.from(project.tasks.javadoc) + Jar javadocJar = project.tasks.getByName('javadocJar') as Jar if (project != project.rootProject) { project.getArtifacts().add('distElements', javadocJar) } + project.getPlugins().withType(SparkVariantPlugin).whenPluginAdded { + SparkVariantPluginExtension sparkVariants = project.getExtensions().getByType(SparkVariantPluginExtension.class) + sparkVariants.featureVariants { SparkVariant variant -> + Jar variantJavadocJar = project.tasks.getByName(variant.taskName('javadocJar')) as Jar + project.getArtifacts().add(variant.configuration('distElements'), variantJavadocJar) + } + } // Task for creating ALL of a project's jars - Like assemble, but this includes the sourcesJar and javadocJar. + // TODO: Assemble is being configured to make javadoc and sources jars no matter what due to the withX() methods above. Is this even required in that case? + // The assemble task was previously configured to ignore javadoc and source tasks because they can be time consuming to generate when simply building the project. + // Probably better to just run them. Task pack = project.tasks.create('pack') pack.dependsOn(project.tasks.jar) - pack.dependsOn(javadocJar) - pack.dependsOn(sourcesJar) - pack.outputs.files(project.tasks.jar.archivePath, javadocJar.archivePath, sourcesJar.archivePath) + pack.dependsOn(project.tasks.javadocJar) + pack.dependsOn(project.tasks.sourcesJar) + pack.outputs.files(project.tasks.jar.archivePath, project.tasks.javadocJar.archivePath, project.tasks.sourcesJar.archivePath) + project.getPlugins().withType(SparkVariantPlugin).whenPluginAdded { + SparkVariantPluginExtension sparkVariants = project.getExtensions().getByType(SparkVariantPluginExtension.class) + sparkVariants.featureVariants { SparkVariant variant -> + pack.dependsOn(project.tasks.getByName(variant.taskName('jar'))) + pack.dependsOn(project.tasks.getByName(variant.taskName('javadocJar'))) + pack.dependsOn(project.tasks.getByName(variant.taskName('sourcesJar'))) + pack.outputs.files( + project.tasks.getByName(variant.taskName('jar')).archivePath, + project.tasks.getByName(variant.taskName('javadocJar')).archivePath, + project.tasks.getByName(variant.taskName('sourcesJar')).archivePath + ) + } + } // The distribution task is like assemble, but packages up a lot of extra jars and performs extra tasks that // are mostly used for snapshots and releases. Task distribution = project.tasks.create('distribution') distribution.dependsOn(pack) // Co-locate all build artifacts into distributions subdir for easier build automation - distribution.doLast { - project.copy { CopySpec spec -> - spec.from(jar.archivePath) - spec.from(javadocJar.archivePath) - spec.from(sourcesJar.archivePath) - spec.into("${project.buildDir}/distributions") + Copy collectArtifacts = project.tasks.create('collectArtifacts', Copy) + collectArtifacts.from(project.tasks.jar) + collectArtifacts.from(project.tasks.javadocJar) + collectArtifacts.from(project.tasks.sourcesJar) + collectArtifacts.into("${project.buildDir}/distributions") + collectArtifacts.dependsOn(pack) + distribution.dependsOn(collectArtifacts) + project.getPlugins().withType(SparkVariantPlugin).whenPluginAdded { + SparkVariantPluginExtension sparkVariants = project.getExtensions().getByType(SparkVariantPluginExtension.class) + sparkVariants.featureVariants { SparkVariant variant -> + Copy variantCollectArtifacts = project.tasks.create('collectArtifacts' + variant.getName(), Copy) + variantCollectArtifacts.from(project.tasks.getByName(variant.taskName('jar'))) + variantCollectArtifacts.from(project.tasks.getByName(variant.taskName('javadocJar'))) + variantCollectArtifacts.from(project.tasks.getByName(variant.taskName('sourcesJar'))) + variantCollectArtifacts.into("${project.buildDir}/distributions") + variantCollectArtifacts.dependsOn(pack) + distribution.dependsOn(variantCollectArtifacts) } } } @@ -394,86 +562,185 @@ class BuildPlugin implements Plugin { } } - private static void configureMaven(Project project) { - Task writePom = project.getTasks().create('writePom') - writePom.doLast { - MavenPluginConvention convention = project.getConvention().getPlugins().get('maven') as MavenPluginConvention - MavenPom pom = customizePom(convention.pom(), project) - pom.writeTo("${project.buildDir}/distributions/${project.archivesBaseName}-${project.version}.pom") - } + private void configureMaven(Project project) { + project.getPluginManager().apply("maven-publish") - // Write the pom when building a distribution. - Task distribution = project.getTasks().getByName('distribution') - distribution.dependsOn(writePom) + // Configure Maven publication + project.publishing { + publications { + main(MavenPublication) { + from project.components.java + suppressAllPomMetadataWarnings() // We get it. Gradle metadata is better than Maven Poms + } + } + repositories { + maven { + name = 'build' + url = "file://${project.buildDir}/repo" + } + } + } - // Get the task that installs to local maven repo. Instruct the installation resolver to use our custom pom. - Upload mavenInstallTask = project.getTasks().getByName('install') as Upload - MavenResolver installResolver = mavenInstallTask.repositories.mavenInstaller as MavenResolver - installResolver.setPom(customizePom(installResolver.getPom(), project)) - } + // Configure Maven Pom + configurePom(project, project.publishing.publications.main) - /** - * Given a maven pom, customize it for our project's using the information provided by the given project. - * @param pom - * @param gradleProject - * @return - */ - private static MavenPom customizePom(MavenPom pom, Project gradleProject) { - // Maven does most of the lifting to translate a Project into a MavenPom - // Run this closure after that initial boilerplate configuration is done - pom.whenConfigured { MavenPom generatedPom -> + // Disable the publishing tasks since we only need the pom generation tasks. + // If we are working with a project that has a scala variant (see below), we need to modify the pom's + // artifact id which the publish task does not like (it fails validation when run). + project.getTasks().withType(PublishToMavenRepository) { PublishToMavenRepository m -> + m.enabled = false + } - // eliminate test-scoped dependencies (no need in maven central poms) - generatedPom.dependencies.removeAll { dep -> - dep.scope == 'test' || dep.artifactId == 'elasticsearch-hadoop-mr' + // Configure Scala Variants if present + project.getPlugins().withType(SparkVariantPlugin).whenPluginAdded { + // Publishing gets weird when you introduce variants into the project. + // By default, when adding a spark/scala variant, its outgoing configurations are added to the main java components. + // The maven publish plugin will take all these variants, smoosh them and their dependencies together, and create + // one big pom file full of version conflicts. Since spark variants are mutually exclusive, we need to perform a + // workaround to materialize multiple poms for the different scala variants. + // TODO: Should this adhoc component configuration work be done in the SparkVariantPlugin? + + SparkVariantPluginExtension sparkVariants = project.getExtensions().getByType(SparkVariantPluginExtension.class) + def javaComponent = project.components.java + + // Main variant needs the least configuration on its own, since it is the default publication created above. + sparkVariants.defaultVariant { SparkVariant variant -> + updateVariantPomLocationAndArtifactId(project, project.publishing.publications.main, variant) } - // Storm hosts their jars outside of maven central. - boolean storm = generatedPom.dependencies.any { it.groupId == 'org.apache.storm' } - - if (storm) - generatedPom.project { - repositories { - repository { - id = 'clojars.org' - url = 'https://clojars.org/repo' - } + // For each spark variant added, we need to do a few things: + sparkVariants.featureVariants { SparkVariant variant -> + // Collect all the outgoing configurations that are compatible with publication + def variantConfigurationsToExcludeFromMain = [ + variant.configuration("apiElements"), + variant.configuration("runtimeElements"), + variant.configuration('javadocElements'), + variant.configuration('sourcesElements'), + variant.configuration('test', 'apiElements'), + variant.configuration('test', 'runtimeElements'), + variant.configuration('itest', 'apiElements'), + variant.configuration('itest', 'runtimeElements') + ] + + // Remove each of those outgoing configurations from the default java component. + // This will keep the default variant from being smooshed together with conflicting artifacts/dependencies. + variantConfigurationsToExcludeFromMain.each { + javaComponent.withVariantsFromConfiguration(project.configurations.getByName(it)) { + skip() } } - // add all items necessary for publication - generatedPom.project { - name = gradleProject.description - description = gradleProject.description - url = 'http://github.com/elastic/elasticsearch-hadoop' - organization { - name = 'Elastic' - url = 'https://www.elastic.co/' + // Create an adhoc component for the variant + def variantComponent = softwareComponentFactory.adhoc("${variant.getName()}Component") + // Add it to the list of components that this project declares + project.components.add(variantComponent) + // Register the variant's outgoing configurations for publication + variantComponent.addVariantsFromConfiguration(project.configurations.getByName(variant.configuration("apiElements"))) { + it.mapToMavenScope("compile") } - licenses { - license { - name = 'The Apache Software License, Version 2.0' - url = 'https://www.apache.org/licenses/LICENSE-2.0.txt' - distribution = 'repo' - } + variantComponent.addVariantsFromConfiguration(project.configurations.getByName(variant.configuration("runtimeElements"))) { + it.mapToMavenScope("runtime") + } + variantComponent.addVariantsFromConfiguration(project.configurations.getByName(variant.configuration("javadocElements"))) { + it.mapToMavenScope("runtime") + } + variantComponent.addVariantsFromConfiguration(project.configurations.getByName(variant.configuration("sourcesElements"))) { + it.mapToMavenScope("runtime") } - scm { - url = 'https://github.com/elastic/elasticsearch-hadoop' - connection = 'scm:git:git://github.com/elastic/elasticsearch-hadoop' - developerConnection = 'scm:git:git://github.com/elastic/elasticsearch-hadoop' + + // Create a publication for this adhoc component to create pom generation and publishing tasks + project.publishing { + publications { + MavenPublication variantPublication = create(variant.getName(), MavenPublication) { + from variantComponent + suppressAllPomMetadataWarnings() // We get it. Gradle metadata is better than Maven Poms + } + configurePom(project, variantPublication) + updateVariantPomLocationAndArtifactId(project, variantPublication, variant) + } } - developers { - developer { - name = 'Elastic' - url = 'https://www.elastic.co' + } + } + + // Set the pom generation tasks as required for the distribution task. + project.tasks.withType(GenerateMavenPom).all { GenerateMavenPom pom -> + project.getTasks().getByName('distribution').dependsOn(pom) + } + } + + private static void configurePom(Project project, MavenPublication publication) { + // Set the pom's destination to the distribution directory + project.tasks.withType(GenerateMavenPom).all { GenerateMavenPom pom -> + if (pom.name == "generatePomFileFor${publication.name.capitalize()}Publication") { + pom.destination = project.provider({"${project.buildDir}/distributions/${project.archivesBaseName}-${project.getVersion()}.pom"}) + } + } + + // add all items necessary for publication + Provider descriptionProvider = project.provider({ project.getDescription() }) + MavenPom pom = publication.getPom() + pom.name = descriptionProvider + pom.description = descriptionProvider + pom.url = 'http://github.com/elastic/elasticsearch-hadoop' + pom.organization { + name = 'Elastic' + url = 'https://www.elastic.co/' + } + pom.licenses { + license { + name = 'The Apache Software License, Version 2.0' + url = 'https://www.apache.org/licenses/LICENSE-2.0.txt' + distribution = 'repo' + } + } + pom.scm { + url = 'https://github.com/elastic/elasticsearch-hadoop' + connection = 'scm:git:git://github.com/elastic/elasticsearch-hadoop' + developerConnection = 'scm:git:git://github.com/elastic/elasticsearch-hadoop' + } + pom.developers { + developer { + name = 'Elastic' + url = 'https://www.elastic.co' + } + } + + publication.getPom().withXml { XmlProvider xml -> + // add all items necessary for publication + Node root = xml.asNode() + + // If we have embedded configuration on the project, remove its dependencies from the dependency nodes + NodeList dependenciesNode = root.get("dependencies") as NodeList + Configuration embedded = project.getConfigurations().findByName('embedded') + if (embedded != null) { + embedded.getAllDependencies().all { Dependency dependency -> + Iterator dependenciesIterator = dependenciesNode.get(0).children().iterator() + while (dependenciesIterator.hasNext()) { + Node dependencyNode = dependenciesIterator.next() + String artifact = dependencyNode.get("artifactId").text() + if (artifact == dependency.getName()) { + dependenciesIterator.remove() + break + } } } } + } + } - groupId = "org.elasticsearch" - artifactId = gradleProject.archivesBaseName + private static void updateVariantPomLocationAndArtifactId(Project project, MavenPublication publication, SparkVariant variant) { + // Fix the pom name + project.tasks.withType(GenerateMavenPom).all { GenerateMavenPom pom -> + if (pom.name == "generatePomFileFor${publication.name.capitalize()}Publication") { + pom.destination = project.provider({"${project.buildDir}/distributions/${project.archivesBaseName}_${variant.scalaMajorVersion}-${project.getVersion()}.pom"}) + } + } + // Fix the artifactId. Note: The publishing task does not like this happening. Hence it is disabled. + publication.getPom().withXml { XmlProvider xml -> + Node root = xml.asNode() + Node artifactId = (root.get('artifactId') as NodeList).get(0) as Node + artifactId.setValue("${project.archivesBaseName}_${variant.scalaMajorVersion}") } - return pom } /** @@ -483,55 +750,88 @@ class BuildPlugin implements Plugin { */ private static void configureIntegrationTestTask(Project project) { if (project != project.rootProject) { - TaskProvider itestJar = project.tasks.register('itestJar', Jar) { Jar itestJar -> - itestJar.dependsOn(project.tasks.getByName('jar')) - itestJar.getArchiveClassifier().set('testing') - - // Add this project's classes to the testing uber-jar - itestJar.from(project.sourceSets.main.output) - itestJar.from(project.sourceSets.test.output) - itestJar.from(project.sourceSets.itest.output) - } - - Test integrationTest = project.tasks.create('integrationTest', StandaloneRestIntegTestTask.class) - integrationTest.dependsOn(itestJar) - - itestJar.configure { Jar jar -> - integrationTest.doFirst { - integrationTest.systemProperty("es.hadoop.job.jar", jar.getArchiveFile().get().asFile.absolutePath) + SourceSetContainer sourceSets = project.sourceSets + SourceSet mainSourceSet = sourceSets.main + SourceSet itestSourceSet = sourceSets.itest + String itestJarTaskName = 'itestJar' + String jarTaskName = 'jar' + String itestJarClassifier = 'testing' + String itestTaskName = 'integrationTest' + + createItestTask(project, mainSourceSet, itestSourceSet, itestJarTaskName, jarTaskName, itestJarClassifier, itestTaskName) + project.getPlugins().withType(SparkVariantPlugin).whenPluginAdded { + SparkVariantPluginExtension sparkVariants = project.getExtensions().getByType(SparkVariantPluginExtension.class) + sparkVariants.featureVariants { SparkVariant variant -> + createItestTask(project, + sourceSets.getByName(variant.getSourceSetName('main')), + sourceSets.getByName(variant.getSourceSetName('itest')), + variant.taskName(itestJarTaskName), + variant.taskName(jarTaskName), + variant.getName() + "-" + itestJarClassifier, + variant.itestTaskName() + ) } } - integrationTest.testClassesDirs = project.sourceSets.itest.output.classesDirs - integrationTest.classpath = project.sourceSets.itest.runtimeClasspath - integrationTest.excludes = ["**/Abstract*.class"] + // Only add cluster settings if it's not the root project + project.logger.info "Configuring ${project.name} integrationTest task to use ES Fixture" + // Create the cluster fixture around the integration test. + // There's probably a more elegant way to do this in Gradle + project.plugins.apply("es.hadoop.cluster") + } + } + + private static Test createItestTask(Project project, SourceSet mainSourceSet, SourceSet itestSourceSet, + String itestJarTaskName, String jarTaskName, String itestJarClassifier, + String itestTaskName) { + TaskProvider itestJar = project.tasks.register(itestJarTaskName, Jar) { Jar itestJar -> + itestJar.dependsOn(project.tasks.getByName(jarTaskName)) + itestJar.getArchiveClassifier().set(itestJarClassifier) - integrationTest.ignoreFailures = false + // Add this project's classes to the testing uber-jar + itestJar.from(mainSourceSet.output) + itestJar.from(itestSourceSet.output) + } - integrationTest.executable = "${project.ext.get('runtimeJavaHome')}/bin/java" - integrationTest.minHeapSize = "256m" - integrationTest.maxHeapSize = "2g" + Test integrationTest = project.tasks.create(itestTaskName, StandaloneRestIntegTestTask.class) - integrationTest.testLogging { - displayGranularity 0 - events "started", "failed" //, "standardOut", "standardError" - exceptionFormat "full" - showCauses true - showExceptions true - showStackTraces true - stackTraceFilters "groovy" - minGranularity 2 - maxGranularity 2 + itestJar.configure { Jar jar -> + integrationTest.doFirst { + integrationTest.systemProperty("es.hadoop.job.jar", jar.getArchiveFile().get().asFile.absolutePath) } + } - integrationTest.reports.html.enabled = false + integrationTest.dependsOn(itestJar) + integrationTest.testClassesDirs = itestSourceSet.output.classesDirs + integrationTest.classpath = itestSourceSet.runtimeClasspath + commonItestTaskConfiguration(project, integrationTest) + // TODO: Should this be the case? It is in Elasticsearch, but we may have to update some CI jobs? + project.tasks.check.dependsOn(integrationTest) + return integrationTest + } - // Only add cluster settings if it's not the root project - project.logger.info "Configuring ${project.name} integrationTest task to use ES Fixture" - // Create the cluster fixture around the integration test. - // There's probably a more elegant way to do this in Gradle - project.plugins.apply("es.hadoop.cluster") + private static void commonItestTaskConfiguration(Project project, Test integrationTest) { + integrationTest.excludes = ["**/Abstract*.class"] + + integrationTest.ignoreFailures = false + + integrationTest.executable = "${project.ext.get('runtimeJavaHome')}/bin/java" + integrationTest.minHeapSize = "256m" + integrationTest.maxHeapSize = "2g" + + integrationTest.testLogging { + displayGranularity 0 + events "started", "failed" //, "standardOut", "standardError" + exceptionFormat "full" + showCauses true + showExceptions true + showStackTraces true + stackTraceFilters "groovy" + minGranularity 2 + maxGranularity 2 } + + integrationTest.reports.html.enabled = false } private static void configurePrecommit(Project project) { diff --git a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/ElasticsearchFixturePlugin.groovy b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/ElasticsearchFixturePlugin.groovy index f59aac939..82f2b4dd9 100644 --- a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/ElasticsearchFixturePlugin.groovy +++ b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/ElasticsearchFixturePlugin.groovy @@ -7,6 +7,7 @@ import org.elasticsearch.gradle.testclusters.TestDistribution import org.gradle.api.NamedDomainObjectContainer import org.gradle.api.Plugin import org.gradle.api.Project +import org.gradle.api.tasks.TaskCollection import org.gradle.process.CommandLineArgumentProvider /** @@ -30,26 +31,28 @@ class ElasticsearchFixturePlugin implements Plugin { def hasLocalRepo = project.hasProperty("localRepo") def useFixture = !hasLocalRepo && Boolean.parseBoolean(project.findProperty("tests.fixture.es.enable") ?: "true") - def integrationTestTask = project.tasks.getByName("integrationTest") as StandaloneRestIntegTestTask + def integrationTestTasks = project.tasks.withType(StandaloneRestIntegTestTask) if (useFixture) { // Depends on project already containing an "integrationTest" // task, as well as javaHome+runtimeJavaHome configured - createClusterFor(integrationTestTask, project, version) + createClusterFor(integrationTestTasks, project, version) } else { - integrationTestTask.systemProperty("test.disable.local.es", "true") + integrationTestTasks.systemProperty("test.disable.local.es", "true") } } - private static void createClusterFor(StandaloneRestIntegTestTask integrationTest, Project project, String version) { + private static void createClusterFor(TaskCollection integrationTests, Project project, String version) { def clustersContainer = project.extensions.getByName(TestClustersPlugin.EXTENSION_NAME) as NamedDomainObjectContainer def integTestCluster = clustersContainer.create("integTest") { ElasticsearchCluster cluster -> cluster.version = version cluster.testDistribution = TestDistribution.DEFAULT } - integrationTest.useCluster(integTestCluster) - // Add the cluster HTTP URI as a system property which isn't tracked as a task input - integrationTest.jvmArgumentProviders.add({ ["-Dtests.rest.cluster=${integTestCluster.httpSocketURI}"] } as CommandLineArgumentProvider) + integrationTests.all { StandaloneRestIntegTestTask integrationTest -> + integrationTest.useCluster(integTestCluster) + // Add the cluster HTTP URI as a system property which isn't tracked as a task input + integrationTest.jvmArgumentProviders.add({ ["-Dtests.rest.cluster=${integTestCluster.httpSocketURI}"] } as CommandLineArgumentProvider) + } // Version settings def majorVersion = version.tokenize(".").get(0).toInteger() diff --git a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/scala/ScalaVariantPlugin.groovy b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/scala/ScalaVariantPlugin.groovy deleted file mode 100644 index ce08b44df..000000000 --- a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/scala/ScalaVariantPlugin.groovy +++ /dev/null @@ -1,172 +0,0 @@ -package org.elasticsearch.hadoop.gradle.scala - - -import org.gradle.api.GradleException -import org.gradle.api.Plugin -import org.gradle.api.Project -import org.gradle.api.Task -import org.gradle.api.internal.DefaultDomainObjectSet -import org.gradle.api.plugins.scala.ScalaBasePlugin -import org.gradle.api.tasks.Exec -import org.gradle.api.tasks.javadoc.Javadoc -import org.gradle.api.tasks.testing.Test -import org.gradle.internal.os.OperatingSystem - -class ScalaVariantPlugin implements Plugin { - - private static final String NESTED_BUILD_RUN = 'scala.variant' - - @Override - void apply(Project project) { - // Ensure plugins - project.getPluginManager().apply(ScalaBasePlugin.class) - - // Make a container for scala versions - ScalaVariantExtension variantExtension = project.getExtensions().create('variants', ScalaVariantExtension.class, project) - - // Add a lifecycle task called crossBuild - Task crossBuild = project.getTasks().create('variants') - - // Current build is not a nested build: - if (isRegularBuild(project)) { - // Wire variants into the build - Task distribution = project.getTasks().getByName('distribution') - distribution.dependsOn(crossBuild) - - // For all variants make a crossBuild#variant per variant version - variantExtension.variants.all { String variantVersion -> - String variantBaseVersion = baseVersionFromFullVersion(variantVersion) - Exec crossBuildForVariant = project.getTasks().create("variants#${variantBaseVersion.replace('.', '_')}", Exec.class) - - // The crossBuild runs the distribution task with a different scala property, and 'nestedRun' set to true - Map properties = new HashMap<>() - properties.put(NESTED_BUILD_RUN, variantVersion) - properties.put('shush', 'true') - if (project.properties.containsKey("localRepo")) { - properties.put('localRepo', 'true') - } - - Map systemProperties = new HashMap<>() - systemProperties.put('build.snapshot', System.getProperty('build.snapshot', 'true')) - - if (OperatingSystem.current().isWindows()) { - crossBuildForVariant.executable('gradlew.bat') - } else { - crossBuildForVariant.executable('./gradlew') - } - - crossBuildForVariant.args(distribution.getPath()) - crossBuildForVariant.args(properties.collect { key, val -> "-P${key}=${val}" }) - crossBuildForVariant.args(systemProperties.collect { key, val -> "-D${key}=${val}" }) - crossBuildForVariant.args('-S') - if (project.logger.isDebugEnabled()) { - crossBuildForVariant.args('--debug') - } else if (project.logger.isInfoEnabled()) { - crossBuildForVariant.args('--info') - } - crossBuildForVariant.workingDir(project.rootDir) - - // The crossBuild depends on each variant build - crossBuild.dependsOn(crossBuildForVariant) - } - } - - // Sense if we're doing a nested run. If so, use the variant version instead of the extension's default version - // for the version property. - if (isNestedRun(project)) { - String configuredVersion = project.getProperties().get(NESTED_BUILD_RUN).toString() - - project.logger.info("Cross-Building scala variant of [$configuredVersion]. " + - "Ignoring default version...") - variantExtension.setDefaultVersion(configuredVersion) - - // The crossBuild is disabled if we're doing a 'nestedRun' - crossBuild.setEnabled(false) - - String variantSuffix = (project.ext.scalaMajorVersion as String).replace('.', '') - - // When working with a variant use a different folder to cache the artifacts between builds - project.sourceSets.each { - it.java.outputDir = project.file(it.java.outputDir.absolutePath.replaceAll("classes", "classes.${variantSuffix}")) - it.scala.outputDir = project.file(it.scala.outputDir.absolutePath.replaceAll("classes", "classes.${variantSuffix}")) - } - - Javadoc javadoc = project.getTasks().getByName('javadoc') as Javadoc - javadoc.setDestinationDir(project.file("${project.docsDir}/javadoc-${variantSuffix}")) - - Test integrationTest = project.getTasks().getByName('integrationTest') as Test - integrationTest.setTestClassesDirs(project.sourceSets.itest.output.classesDirs) - } - } - - static boolean isRegularBuild(Project project) { - return !isNestedRun(project) - } - - static boolean isNestedRun(Project project) { - return project.hasProperty(NESTED_BUILD_RUN) - } - - static class ScalaVariantExtension { - private final Project project - protected DefaultDomainObjectSet variants = new DefaultDomainObjectSet<>(String.class) - protected String defaultVersion - protected String defaultBaseVersion - - ScalaVariantExtension(Project project) { - this.project = project - this.defaultVersion = null - } - - void setTargetVersions(String... versions) { - List toAdd = versions as List - if (defaultBaseVersion != null) { - toAdd.removeAll { (baseVersionFromFullVersion(it) == defaultBaseVersion) } - } - variants.addAll(toAdd) - } - - void targetVersions(String... versions) { - setTargetVersions(versions) - } - - void setDefaultVersion(String version) { - if (defaultVersion != null) { - // Ignore new values after being set. - project.logger.warn("Ignoring default version of [$version] as it is already configured as [$defaultVersion]") - return - } - - // Set the version string - defaultVersion = version - defaultBaseVersion = baseVersionFromFullVersion(version) - - // Remove any versions from variants that match - variants.removeAll { (baseVersionFromFullVersion(it) == defaultBaseVersion) } - - // Configure project properties to contain the scala versions - project.ext.scalaVersion = defaultVersion - project.ext.scalaMajorVersion = defaultBaseVersion - - // Set the major version on the archives base name. - project.archivesBaseName += "_${project.ext.scalaMajorVersion}" - } - - void defaultVersion(String version) { - setDefaultVersion(version) - } - } - -/** - * Takes an epoch.major.minor version and returns the epoch.major version form of it. - * @return - */ - static String baseVersionFromFullVersion(String fullVersion) { - List versionParts = fullVersion.tokenize('.') - if (versionParts.size() != 3) { - throw new GradleException("Invalid Scala Version - Version [$fullVersion] is not a full scala version (epoch.major.minor).") - } - return versionParts.init().join('.') - } - -} diff --git a/buildSrc/src/main/java/org/elasticsearch/hadoop/gradle/scala/SparkVariantPlugin.java b/buildSrc/src/main/java/org/elasticsearch/hadoop/gradle/scala/SparkVariantPlugin.java new file mode 100644 index 000000000..e95375e4c --- /dev/null +++ b/buildSrc/src/main/java/org/elasticsearch/hadoop/gradle/scala/SparkVariantPlugin.java @@ -0,0 +1,460 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.hadoop.gradle.scala; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import groovy.lang.Closure; +import org.codehaus.groovy.runtime.InvokerHelper; +import org.codehaus.groovy.runtime.StringGroovyMethods; +import org.gradle.api.Action; +import org.gradle.api.GradleException; +import org.gradle.api.NamedDomainObjectSet; +import org.gradle.api.Plugin; +import org.gradle.api.Project; +import org.gradle.api.Task; +import org.gradle.api.artifacts.ComponentMetadataContext; +import org.gradle.api.artifacts.ComponentMetadataRule; +import org.gradle.api.artifacts.Configuration; +import org.gradle.api.artifacts.ConfigurationContainer; +import org.gradle.api.file.ConfigurableFileCollection; +import org.gradle.api.file.SourceDirectorySet; +import org.gradle.api.internal.artifacts.repositories.resolver.ComponentMetadataDetailsAdapter; +import org.gradle.api.internal.tasks.DefaultScalaSourceSet; +import org.gradle.api.plugins.BasePlugin; +import org.gradle.api.plugins.Convention; +import org.gradle.api.plugins.JavaBasePlugin; +import org.gradle.api.plugins.JavaPlugin; +import org.gradle.api.plugins.JavaPluginConvention; +import org.gradle.api.plugins.JavaPluginExtension; +import org.gradle.api.tasks.SourceSet; +import org.gradle.api.tasks.SourceSetContainer; +import org.gradle.api.tasks.TaskCollection; +import org.gradle.api.tasks.TaskContainer; +import org.gradle.api.tasks.TaskProvider; +import org.gradle.api.tasks.bundling.Jar; +import org.gradle.api.tasks.scala.ScalaDoc; +import org.gradle.api.tasks.testing.Test; +import org.gradle.util.ConfigureUtil; + +import static org.gradle.api.plugins.JavaBasePlugin.DOCUMENTATION_GROUP; +import static org.gradle.api.plugins.JavaBasePlugin.VERIFICATION_GROUP; +import static org.gradle.api.plugins.JavaPlugin.API_ELEMENTS_CONFIGURATION_NAME; +import static org.gradle.api.plugins.JavaPlugin.COMPILE_CLASSPATH_CONFIGURATION_NAME; +import static org.gradle.api.plugins.JavaPlugin.COMPILE_CONFIGURATION_NAME; +import static org.gradle.api.plugins.JavaPlugin.IMPLEMENTATION_CONFIGURATION_NAME; +import static org.gradle.api.plugins.JavaPlugin.RUNTIME_CLASSPATH_CONFIGURATION_NAME; +import static org.gradle.api.plugins.JavaPlugin.RUNTIME_CONFIGURATION_NAME; +import static org.gradle.api.plugins.JavaPlugin.RUNTIME_ELEMENTS_CONFIGURATION_NAME; +import static org.gradle.api.plugins.JavaPlugin.RUNTIME_ONLY_CONFIGURATION_NAME; +import static org.gradle.api.plugins.JavaPlugin.TEST_TASK_NAME; +import static org.gradle.api.plugins.scala.ScalaPlugin.SCALA_DOC_TASK_NAME; +import static org.gradle.api.tasks.SourceSet.MAIN_SOURCE_SET_NAME; +import static org.gradle.api.tasks.SourceSet.TEST_SOURCE_SET_NAME; + +public class SparkVariantPlugin implements Plugin { + + public static class SparkVariant { + + private final CharSequence name; + private final boolean isDefaultVariant; + private final String sparkVersion; + private final String scalaVersion; + private final String scalaMajorVersion; + private final String capability; + + public SparkVariant(String name) { + throw new GradleException("Cannot create variant named [" + name + "]. Do not instantiate objects directly. " + + "You must configure this via the SparkVariantPluginExtension."); + } + + public SparkVariant(CharSequence name, boolean isDefaultVariant, String sparkVersion, String scalaVersion, String capability) { + this.name = name; + this.isDefaultVariant = isDefaultVariant; + this.sparkVersion = sparkVersion; + this.scalaVersion = scalaVersion; + this.scalaMajorVersion = scalaVersion.substring(0, scalaVersion.lastIndexOf('.')); + this.capability = capability; + } + + public String getName() { + return name.toString(); + } + + public String getVariantName(String prefix) { + return prefix + StringGroovyMethods.capitalize(name); + } + + public boolean isDefaultVariant() { + return isDefaultVariant; + } + + public String getSparkVersion() { + return sparkVersion; + } + + public String getScalaVersion() { + return scalaVersion; + } + + public String getScalaMajorVersion() { + return scalaMajorVersion; + } + + public String getCapability() { + return capability; + } + + public String getSourceSetName(String sourceSetName) { + String result; + if (isDefaultVariant) { + result = sourceSetName; + } else { + if (MAIN_SOURCE_SET_NAME.equals(sourceSetName)) { + result = name.toString(); + } else { + result = sourceSetName + StringGroovyMethods.capitalize(name); + } + } + return result; + } + + public String configuration(CharSequence configurationName) { + return configuration(MAIN_SOURCE_SET_NAME, configurationName); + } + + public String configuration(String sourceSet, CharSequence configurationName) { + String result; + if (isDefaultVariant && MAIN_SOURCE_SET_NAME.equals(sourceSet)) { + result = configurationName.toString(); + } else { + result = getSourceSetName(sourceSet) + StringGroovyMethods.capitalize(configurationName); + } + return result; + } + + public String taskName(CharSequence taskName) { + return isDefaultVariant ? taskName.toString() : name + StringGroovyMethods.capitalize(taskName); + } + + public String testTaskName() { + return isDefaultVariant ? TEST_TASK_NAME : TEST_TASK_NAME + StringGroovyMethods.capitalize(name); + } + + public String itestTaskName() { + return isDefaultVariant ? "integrationTest" : "integrationTest" + StringGroovyMethods.capitalize(name); + } + + public String getCapabilityName(Object version) { + return capability + ":" + getName() + ":" + version.toString(); + } + } + + public static class SparkVariantPluginExtension { + + private final NamedDomainObjectSet variants; + private String capability = null; + private SparkVariant defaultVariant = null; + + public SparkVariantPluginExtension(Project project) { + this.variants = project.container(SparkVariant.class); + } + + public void capabilityGroup(String capability) { + this.capability = capability; + } + + public SparkVariant setDefaultVariant(String variantName, String sparkVersion, String scalaVersion) { + if (defaultVariant != null) { + throw new GradleException("Cannot set default variant multiple times"); + } + if (capability == null) { + throw new GradleException("Must set capability group before adding variant definitions"); + } + defaultVariant = new SparkVariant(variantName, true, sparkVersion, scalaVersion, capability); + variants.add(defaultVariant); + return defaultVariant; + } + + public SparkVariant addFeatureVariant(String variantName, String sparkVersion, String scalaVersion) { + if (capability == null) { + throw new GradleException("Must set capability group before adding variant definitions"); + } + SparkVariant variant = new SparkVariant(variantName, false, sparkVersion, scalaVersion, capability); + variants.add(variant); + return variant; + } + + public void all(Closure configure) { + all(ConfigureUtil.configureUsing(configure)); + } + + public void all(Action action) { + variants.all(action); + } + + public void defaultVariant(Closure configure) { + defaultVariant(ConfigureUtil.configureUsing(configure)); + } + + public void defaultVariant(Action action) { + variants.matching(SparkVariant::isDefaultVariant).all(action); + } + + public void featureVariants(Closure configure) { + featureVariants(ConfigureUtil.configureUsing(configure)); + } + + public void featureVariants(Action action) { + variants.matching(element -> !element.isDefaultVariant()).all(action); + } + + public SparkVariant featureVariant(String featureVariant, Closure configure) { + return featureVariant(featureVariant, ConfigureUtil.configureUsing(configure)); + } + + public SparkVariant featureVariant(String featureVariant, Action action) { + return variants.getByName(featureVariant, action); + } + } + + /** + * A rule that takes in a dependency component, checks if it is a distribution of the scala-library, and annotates it with a capability. + */ + public static class ScalaRuntimeCapability implements ComponentMetadataRule { + private final static String SCALA_LIBRARY = "scala-library"; + + @Override + public void execute(ComponentMetadataContext componentMetadataContext) { + if (componentMetadataContext.getDetails() instanceof ComponentMetadataDetailsAdapter) { + final ComponentMetadataDetailsAdapter details = (ComponentMetadataDetailsAdapter) componentMetadataContext.getDetails(); + if (SCALA_LIBRARY.equals(details.getId().getName())) { + details.allVariants(variantMetadata -> { + variantMetadata.withCapabilities(capabilityMetadata -> { + capabilityMetadata.addCapability("org.elasticsearch.gradle", SCALA_LIBRARY, details.getId().getVersion()); + }); + }); + } + } + } + } + + // TODO: address deprecated configuration names + private static List TEST_CONFIGURATIONS_EXTENDED = Arrays.asList( + COMPILE_CONFIGURATION_NAME, + IMPLEMENTATION_CONFIGURATION_NAME, + RUNTIME_CONFIGURATION_NAME, + RUNTIME_ONLY_CONFIGURATION_NAME + ); + + @Override + public void apply(final Project project) { + SparkVariantPluginExtension extension = project.getExtensions().create("sparkVariants", SparkVariantPluginExtension.class, project); + final JavaPluginConvention javaPluginConvention = project.getConvention().getPlugin(JavaPluginConvention.class); + final JavaPluginExtension javaPluginExtension = project.getExtensions().getByType(JavaPluginExtension.class); + + // Add a rule that annotates scala-library dependencies with the scala-library capability + project.getDependencies().getComponents().all(ScalaRuntimeCapability.class); + + extension.defaultVariant(sparkVariant -> configureDefaultVariant(project, sparkVariant, javaPluginExtension, javaPluginConvention)); + extension.featureVariants(sparkVariant -> configureVariant(project, sparkVariant, javaPluginExtension, javaPluginConvention)); + } + + private static void configureDefaultVariant(Project project, SparkVariant sparkVariant, JavaPluginExtension javaPluginExtension, + JavaPluginConvention javaPluginConvention) { + ConfigurationContainer configurations = project.getConfigurations(); + String capability = sparkVariant.getCapabilityName(project.getVersion()); + + Configuration apiElements = configurations.getByName(API_ELEMENTS_CONFIGURATION_NAME); + apiElements.getOutgoing().capability(capability); + + Configuration runtimeElements = configurations.getByName(RUNTIME_ELEMENTS_CONFIGURATION_NAME); + runtimeElements.getOutgoing().capability(capability); + + configureScalaJarClassifiers(project, sparkVariant); + } + + private static void configureVariant(Project project, SparkVariant sparkVariant, JavaPluginExtension javaPluginExtension, + JavaPluginConvention javaPluginConvention) { + SourceSetContainer sourceSets = javaPluginConvention.getSourceSets(); + ConfigurationContainer configurations = project.getConfigurations(); + TaskContainer tasks = project.getTasks(); + Object version = project.getVersion(); + + // Create a main and test source set for this variant + SourceSet main = createVariantSourceSet(sparkVariant, sourceSets, MAIN_SOURCE_SET_NAME); + + // Register our main source set as a variant in the project + registerMainVariant(javaPluginExtension, sparkVariant, main, version); + + // Register a test source set as an additional variant source set that extends from main + SourceSet test = configureAdditionalVariantSourceSet(project, sparkVariant, javaPluginExtension, sourceSets, + configurations, version, TEST_SOURCE_SET_NAME); + + // Task Creation and Configuration + createVariantTestTask(tasks, sparkVariant, test); + configureVariantJar(tasks, sparkVariant); + registerVariantScaladoc(project, tasks, sparkVariant, main); + configureScalaJarClassifiers(project, sparkVariant); + } + + public static SourceSet configureAdditionalVariantSourceSet(Project project, SparkVariant sparkVariant, String sourceSetName) { + final JavaPluginConvention javaPluginConvention = project.getConvention().getPlugin(JavaPluginConvention.class); + final JavaPluginExtension javaPluginExtension = project.getExtensions().getByType(JavaPluginExtension.class); + SourceSetContainer sourceSets = javaPluginConvention.getSourceSets(); + ConfigurationContainer configurations = project.getConfigurations(); + String version = project.getVersion().toString(); + + return configureAdditionalVariantSourceSet(project, sparkVariant, javaPluginExtension, sourceSets, configurations, + version, sourceSetName); + } + + + private static SourceSet configureAdditionalVariantSourceSet(Project project, SparkVariant sparkVariant, JavaPluginExtension javaPluginExtension, + SourceSetContainer sourceSets, ConfigurationContainer configurations, Object version, + String sourceSetName) { + // Create the additional source set for this variant + SourceSet additional = createVariantSourceSet(sparkVariant, sourceSets, sourceSetName); + + // Each variant's test source set is registered like just another variant in Gradle. These variants do not get any of the special + // treatment needed in order to function like the testing part of a regular project. We need to do some basic wiring in the test + // source set ourselves in order to get there. + SourceSet main = sourceSets.getByName(sparkVariant.getSourceSetName(MAIN_SOURCE_SET_NAME)); + + configureAdditionalSourceSetClasspaths(project, configurations, sparkVariant, sourceSetName, additional, main); + + // Register variant and extend + registerAdditionalVariant(javaPluginExtension, sparkVariant, sourceSetName, additional, version); + extendMainConfigurations(configurations, sparkVariant, sourceSetName); + + return additional; + } + + private static SourceSet createVariantSourceSet(SparkVariant sparkVariant, SourceSetContainer sourceSets, String sourceSetName) { + SourceSet sourceSet = sourceSets.create(sparkVariant.getSourceSetName(sourceSetName)); + + SourceDirectorySet javaSourceSet = sourceSet.getJava(); + javaSourceSet.setSrcDirs(Collections.singletonList("src/" + sourceSetName + "/java")); + + SourceDirectorySet resourcesSourceSet = sourceSet.getResources(); + resourcesSourceSet.setSrcDirs(Collections.singletonList("src/" + sourceSetName + "/resources")); + + SourceDirectorySet scalaSourceSet = getScalaSourceSet(sourceSet).getScala(); + scalaSourceSet.setSrcDirs(Arrays.asList( + "src/" + sourceSetName + "/scala", + "src/" + sourceSetName + "/" + sparkVariant.getName() + )); + + return sourceSet; + } + + private static void configureAdditionalSourceSetClasspaths(Project project, ConfigurationContainer configurations, SparkVariant sparkVariant, + String sourceSetName, SourceSet additionalSourceSet, SourceSet mainSourceSet) { + String additionalCompileClasspathName = sparkVariant.configuration(sourceSetName, COMPILE_CLASSPATH_CONFIGURATION_NAME); + Configuration additionalCompileClasspath = configurations.getByName(additionalCompileClasspathName); + additionalSourceSet.setCompileClasspath((project.files(mainSourceSet.getOutput(), additionalCompileClasspath))); + + String additionalRuntimeClasspathName = sparkVariant.configuration(sourceSetName, RUNTIME_CLASSPATH_CONFIGURATION_NAME); + Configuration additionalRuntimeClasspath = configurations.getByName(additionalRuntimeClasspathName); + additionalSourceSet.setRuntimeClasspath(project.files(additionalSourceSet.getOutput(), mainSourceSet.getOutput(), additionalRuntimeClasspath)); + } + + private static DefaultScalaSourceSet getScalaSourceSet(SourceSet sourceSet) { + Convention sourceSetConvention = (Convention) InvokerHelper.getProperty(sourceSet, "convention"); + return (DefaultScalaSourceSet) sourceSetConvention.getPlugins().get("scala"); + } + + private static void registerMainVariant(JavaPluginExtension java, SparkVariant sparkVariant, SourceSet main, Object version) { + java.registerFeature(sparkVariant.getName(), featureSpec -> { + featureSpec.usingSourceSet(main); + featureSpec.capability(sparkVariant.getCapability(), sparkVariant.getName(), version.toString()); + featureSpec.withJavadocJar(); + featureSpec.withSourcesJar(); + }); + } + + private static void registerAdditionalVariant(JavaPluginExtension java, SparkVariant sparkVariant, String sourceSetName, SourceSet additional, Object version) { + java.registerFeature(sparkVariant.getVariantName(sourceSetName), featureSpec -> { + featureSpec.usingSourceSet(additional); + featureSpec.capability(sparkVariant.getCapability(), sparkVariant.getVariantName(sourceSetName), version.toString()); + }); + } + + private static void extendMainConfigurations(ConfigurationContainer configurations, SparkVariant sparkVariant, String testSourceSetName) { + for (String configurationName : TEST_CONFIGURATIONS_EXTENDED) { + Configuration mainConfiguration = configurations.getByName(sparkVariant.configuration(MAIN_SOURCE_SET_NAME, configurationName)); + Configuration testConfiguration = configurations.getByName(sparkVariant.configuration(testSourceSetName, configurationName)); + testConfiguration.extendsFrom(mainConfiguration); + } + } + + private static void createVariantTestTask(TaskContainer tasks, SparkVariant sparkVariant, SourceSet test) { + Test variantTestTask = tasks.create(sparkVariant.testTaskName(), Test.class); + variantTestTask.setGroup(VERIFICATION_GROUP); + variantTestTask.setTestClassesDirs(test.getOutput().getClassesDirs()); + variantTestTask.setClasspath(test.getRuntimeClasspath()); + + Task check = tasks.getByName(JavaBasePlugin.CHECK_TASK_NAME); + check.dependsOn(variantTestTask); + } + + private static void configureVariantJar(TaskContainer tasks, SparkVariant sparkVariant) { + Task build = tasks.getByName(BasePlugin.ASSEMBLE_TASK_NAME); + build.dependsOn(sparkVariant.taskName(JavaPlugin.JAR_TASK_NAME)); + } + + private static void registerVariantScaladoc(Project project, TaskContainer tasks, SparkVariant sparkVariant, SourceSet main) { + TaskProvider scalaDocProvider = tasks.register(sparkVariant.taskName(SCALA_DOC_TASK_NAME), ScalaDoc.class); + scalaDocProvider.configure(scalaDoc -> { + scalaDoc.setGroup(DOCUMENTATION_GROUP); + scalaDoc.setDescription("Generates Scaladoc for the " + sparkVariant.getSourceSetName(MAIN_SOURCE_SET_NAME) + " source code."); + + ConfigurableFileCollection scaladocClasspath = project.files(); + scaladocClasspath.from(main.getOutput()); + scaladocClasspath.from(main.getCompileClasspath()); + + scalaDoc.setClasspath(scaladocClasspath); + scalaDoc.setSource(getScalaSourceSet(main).getScala()); + }); + } + + private static void correctScalaJarClassifiers(Jar jar, SparkVariant sparkVariant) { + if (sparkVariant.isDefaultVariant() == false) { + String classifier = jar.getArchiveClassifier().get(); + classifier = classifier.replace(sparkVariant.name, ""); + if (classifier.startsWith("-")) { + classifier = classifier.substring(1); + } + jar.getArchiveClassifier().set(classifier); + } + String baseName = jar.getArchiveBaseName().get(); + baseName = baseName + "_" + sparkVariant.scalaMajorVersion; + jar.getArchiveBaseName().set(baseName); + } + + private static void configureScalaJarClassifiers(Project project, final SparkVariant sparkVariant) { + TaskCollection jars = project.getTasks().withType(Jar.class); + correctScalaJarClassifiers(jars.getByName(sparkVariant.taskName("jar")), sparkVariant); + correctScalaJarClassifiers(jars.getByName(sparkVariant.taskName("javadocJar")), sparkVariant); + correctScalaJarClassifiers(jars.getByName(sparkVariant.taskName("sourcesJar")), sparkVariant); + } +} diff --git a/buildSrc/src/main/resources/META-INF/gradle-plugins/scala.variants.properties b/buildSrc/src/main/resources/META-INF/gradle-plugins/scala.variants.properties deleted file mode 100644 index 5ed3fab0a..000000000 --- a/buildSrc/src/main/resources/META-INF/gradle-plugins/scala.variants.properties +++ /dev/null @@ -1 +0,0 @@ -implementation-class=org.elasticsearch.hadoop.gradle.scala.ScalaVariantPlugin diff --git a/buildSrc/src/main/resources/META-INF/gradle-plugins/spark.variants.properties b/buildSrc/src/main/resources/META-INF/gradle-plugins/spark.variants.properties new file mode 100644 index 000000000..c14446772 --- /dev/null +++ b/buildSrc/src/main/resources/META-INF/gradle-plugins/spark.variants.properties @@ -0,0 +1 @@ +implementation-class=org.elasticsearch.hadoop.gradle.scala.SparkVariantPlugin diff --git a/dist/build.gradle b/dist/build.gradle index e6e5cbe28..f69d21118 100644 --- a/dist/build.gradle +++ b/dist/build.gradle @@ -6,6 +6,8 @@ apply plugin: 'es.hadoop.build' description = "Elasticsearch for Apache Hadoop" project.archivesBaseName = 'elasticsearch-hadoop' +def sparkVariantIncluded = 'spark20scala211' + configurations { embedded { canBeResolved = true @@ -30,18 +32,26 @@ configurations { def distProjects = [":elasticsearch-hadoop-mr", ":elasticsearch-hadoop-hive", ":elasticsearch-hadoop-pig", ":elasticsearch-spark-20", ":elasticsearch-storm"] + distProjects.each { distProject -> + def configureDistDependency = { Dependency dependency -> + if (distProject == ":elasticsearch-spark-20") { + dependency.capabilities { + requireCapability("org.elasticsearch.spark.sql.variant:$sparkVariantIncluded:$project.version") + } + } + } dependencies { // This is only going to pull in each project's regular jar to create the project-wide uberjar. - embedded(project(distProject)) + add('embedded', project(distProject), configureDistDependency) // To squash Javadoc warnings. - javadocDependencies(project(distProject)) + add('javadocDependencies', project(distProject), configureDistDependency) // This will pull all java sources (including generated) for the project-wide javadoc. - javadocSources(project(distProject)) + add('javadocSources', project(distProject), configureDistDependency) // This will pull all non-generated sources for the project-wide source jar. - additionalSources(project(distProject)) + add('additionalSources', project(distProject), configureDistDependency) // This will pull in the regular jar, javadoc jar, and source jar to be packaged in the distribution. - dist(project(distProject)) + add('dist', project(distProject), configureDistDependency) } } @@ -109,6 +119,25 @@ javadoc { } } +publishing { + publications { + main { + getPom().withXml { XmlProvider xml -> + Node root = xml.asNode() + + // add clojars repo to pom + Node repositories = root.appendNode('repositories') + Node repository = repositories.appendNode('repository') + repository.appendNode('id', 'clojars.org') + repository.appendNode('url', 'https://clojars.org/repo') + + // Correct the artifact Id, otherwise it is listed as 'dist' + root.get('artifactId').get(0).setValue(project.archivesBaseName) + } + } + } +} + // Name of the directory under the root of the zip file that will contain the zip contents String zipContentDir = "elasticsearch-hadoop-${project.version}" diff --git a/gradle.properties b/gradle.properties index 3fa663499..795ccb9b4 100644 --- a/gradle.properties +++ b/gradle.properties @@ -16,14 +16,20 @@ pigVersion = 0.15.0 jodaVersion = 1.6 # note the versions here are tied to the ones in Hadoop distro - 1.8.8 jacksonVersion = 1.8.8 + # Spark spark13Version = 1.6.2 spark20Version = 2.3.0 +spark22Version = 2.2.3 +spark24Version = 2.4.4 + # same as Spark's scala210Version = 2.10.7 scala210MajorVersion = 2.10 scala211Version = 2.11.12 scala211MajorVersion = 2.11 +scala212Version = 2.12.8 +scala212MajorVersion = 2.12 stormVersion = 1.0.6 diff --git a/hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-mapping.json b/hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-mapping.json new file mode 100644 index 000000000..258b12e5b --- /dev/null +++ b/hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-mapping.json @@ -0,0 +1,9 @@ +{ + "data" : { + "properties" : { + "&t" : { + "type" : "date" + } + } + } +} \ No newline at end of file diff --git a/hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-mappingresponse.json b/hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-mappingresponse.json new file mode 100644 index 000000000..da058a7f4 --- /dev/null +++ b/hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-mappingresponse.json @@ -0,0 +1,11 @@ +{ + "index": { + "mappings": { + "properties" : { + "&t" : { + "type" : "date" + } + } + } + } +} \ No newline at end of file diff --git a/hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-source.json b/hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-source.json new file mode 100644 index 000000000..229f90793 --- /dev/null +++ b/hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-source.json @@ -0,0 +1,26 @@ +{ + "_scroll_id": "c2NhbjswOzE7dG90YWxfaGl0czoxOw==", + "took": 18, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "failed": 0 + }, + "hits": { + "total": 1, + "max_score": 0.0, + "hits": [ + { + "_index": "hive", + "_type": "date-as-long", + "_id": "1", + "_score": 0.0, + "_source": { + "type": 1, + "&t": 1407239910771 + } + } + ] + } +} diff --git a/hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-typeless-mapping.json b/hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-typeless-mapping.json new file mode 100644 index 000000000..a1ae40809 --- /dev/null +++ b/hive/src/itest/resources/org/elasticsearch/hadoop/hive/hive-date-typeless-mapping.json @@ -0,0 +1,7 @@ +{ + "properties" : { + "&t" : { + "type" : "date" + } + } +} \ No newline at end of file diff --git a/licenses/protobuf-java-2.5.0.jar.sha1 b/licenses/protobuf-java-2.5.0.jar.sha1 new file mode 100644 index 000000000..71f918819 --- /dev/null +++ b/licenses/protobuf-java-2.5.0.jar.sha1 @@ -0,0 +1 @@ +a10732c76bfacdbd633a7eb0f7968b1059a65dfa \ No newline at end of file diff --git a/licenses/protobuf-java-LICENSE.txt b/licenses/protobuf-java-LICENSE.txt new file mode 100644 index 000000000..97a6e3d19 --- /dev/null +++ b/licenses/protobuf-java-LICENSE.txt @@ -0,0 +1,32 @@ +Copyright 2008 Google Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Code generated by the Protocol Buffer compiler is owned by the owner +of the input file used when generating it. This code is not +standalone and requires a support library to be linked with it. This +support library is itself covered by the above license. \ No newline at end of file diff --git a/licenses/protobuf-java-NOTICE.txt b/licenses/protobuf-java-NOTICE.txt new file mode 100644 index 000000000..e69de29bb diff --git a/licenses/spark-catalyst_2.11-2.3.0.jar.sha1 b/licenses/spark-catalyst_2.11-2.3.0.jar.sha1 deleted file mode 100644 index d4d7e2e72..000000000 --- a/licenses/spark-catalyst_2.11-2.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -89b1654b97bfe35e466e7511fd5b11033c2bbbd9 \ No newline at end of file diff --git a/licenses/spark-catalyst_2.11-2.4.4.jar.sha1 b/licenses/spark-catalyst_2.11-2.4.4.jar.sha1 new file mode 100644 index 000000000..c2a134ad3 --- /dev/null +++ b/licenses/spark-catalyst_2.11-2.4.4.jar.sha1 @@ -0,0 +1 @@ +ba9237eac7523f0d61e104bc6c35f01240020241 \ No newline at end of file diff --git a/licenses/spark-core_2.11-2.3.0.jar.sha1 b/licenses/spark-core_2.11-2.3.0.jar.sha1 deleted file mode 100644 index 44379c147..000000000 --- a/licenses/spark-core_2.11-2.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -09e2bc021bd38b06da2e0a56fdd9d13935503d94 \ No newline at end of file diff --git a/licenses/spark-core_2.11-2.4.4.jar.sha1 b/licenses/spark-core_2.11-2.4.4.jar.sha1 new file mode 100644 index 000000000..87bdd6969 --- /dev/null +++ b/licenses/spark-core_2.11-2.4.4.jar.sha1 @@ -0,0 +1 @@ +98226adb9bd1fb12479f5da1888e22b0fc89e1aa \ No newline at end of file diff --git a/licenses/spark-sql_2.11-2.3.0.jar.sha1 b/licenses/spark-sql_2.11-2.3.0.jar.sha1 deleted file mode 100644 index e519c1764..000000000 --- a/licenses/spark-sql_2.11-2.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -e19282137294c8a889917c05006931fbcd8d66d2 \ No newline at end of file diff --git a/licenses/spark-sql_2.11-2.4.4.jar.sha1 b/licenses/spark-sql_2.11-2.4.4.jar.sha1 new file mode 100644 index 000000000..c5633efa7 --- /dev/null +++ b/licenses/spark-sql_2.11-2.4.4.jar.sha1 @@ -0,0 +1 @@ +e6a748c169978b0d070002f7849e4edc9bbb3db4 \ No newline at end of file diff --git a/licenses/spark-streaming_2.11-2.3.0.jar.sha1 b/licenses/spark-streaming_2.11-2.3.0.jar.sha1 deleted file mode 100644 index 133766241..000000000 --- a/licenses/spark-streaming_2.11-2.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -57da1135f7192a2be85987f1708abf94887f7323 \ No newline at end of file diff --git a/licenses/spark-streaming_2.11-2.4.4.jar.sha1 b/licenses/spark-streaming_2.11-2.4.4.jar.sha1 new file mode 100644 index 000000000..20790a140 --- /dev/null +++ b/licenses/spark-streaming_2.11-2.4.4.jar.sha1 @@ -0,0 +1 @@ +447fbcc8e6799e072a22e2fb404f6f0ea6d1f143 \ No newline at end of file diff --git a/licenses/spark-yarn_2.11-2.3.0.jar.sha1 b/licenses/spark-yarn_2.11-2.3.0.jar.sha1 deleted file mode 100644 index 1877f6a67..000000000 --- a/licenses/spark-yarn_2.11-2.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -dffb93bc7154b1242155585fd117425c40d70bd4 \ No newline at end of file diff --git a/licenses/spark-yarn_2.11-2.4.4.jar.sha1 b/licenses/spark-yarn_2.11-2.4.4.jar.sha1 new file mode 100644 index 000000000..743c65fa7 --- /dev/null +++ b/licenses/spark-yarn_2.11-2.4.4.jar.sha1 @@ -0,0 +1 @@ +cc2bd27ca1ae0368e06e243e652a5d4f62258121 \ No newline at end of file diff --git a/qa/kerberos/build.gradle b/qa/kerberos/build.gradle index 3710de73e..6d68ff201 100644 --- a/qa/kerberos/build.gradle +++ b/qa/kerberos/build.gradle @@ -68,7 +68,12 @@ dependencies { implementation('org.scala-lang:scala-library:2.11.12') implementation('org.scala-lang:scala-reflect:2.11.12') - implementation(project(":elasticsearch-spark-20")) + implementation(project(":elasticsearch-spark-20")) { + capabilities { + // Spark 2.x on Scala 2.11 + requireCapability("org.elasticsearch.spark.sql.variant:spark20scala211:$project.version") + } + } compileOnly("com.fasterxml.jackson.module:jackson-module-scala_2.11:2.6.7.1") compileOnly("com.fasterxml.jackson.core:jackson-annotations:2.6.7") diff --git a/settings.gradle b/settings.gradle index 9fabf76ac..d6c036396 100644 --- a/settings.gradle +++ b/settings.gradle @@ -7,6 +7,10 @@ project(":hive").name = "elasticsearch-hadoop-hive" include 'pig' project(":pig").name = "elasticsearch-hadoop-pig" +include 'spark-core' +project(":spark-core").projectDir = new File(settingsDir, "spark/core") +project(":spark-core").name = "elasticsearch-spark" + include 'sql-13' project(":sql-13").projectDir = new File(settingsDir, "spark/sql-13") project(":sql-13").name = "elasticsearch-spark-13" diff --git a/spark/core/build.gradle b/spark/core/build.gradle new file mode 100644 index 000000000..24799c45a --- /dev/null +++ b/spark/core/build.gradle @@ -0,0 +1,148 @@ +import org.elasticsearch.hadoop.gradle.scala.SparkVariantPlugin + +description = "Elasticsearch Spark Core" + +apply plugin: 'java-library' +apply plugin: 'scala' +apply plugin: 'es.hadoop.build' +apply plugin: 'spark.variants' + +sparkVariants { + capabilityGroup 'org.elasticsearch.spark.variant' + setDefaultVariant "spark20scala211", spark24Version, scala211Version + addFeatureVariant "spark20scala210", spark22Version, scala210Version + addFeatureVariant "spark13scala211", spark13Version, scala211Version + addFeatureVariant "spark13scala210", spark13Version, scala210Version + + all { SparkVariantPlugin.SparkVariant variant -> + + String scalaCompileTaskName = project.sourceSets + .getByName(variant.getSourceSetName("main")) + .getCompileTaskName("scala") + + // Configure main compile task + project.getTasks().getByName(scalaCompileTaskName) { ScalaCompile compileScala -> + configure(compileScala.scalaCompileOptions.forkOptions) { + memoryMaximumSize = '1g' + jvmArgs = ['-XX:MaxPermSize=512m'] + } + compileScala.scalaCompileOptions.additionalParameters = [ + "-feature", + "-unchecked", + "-deprecation", + "-Xfuture", + "-Yno-adapted-args", + "-Ywarn-dead-code", + "-Ywarn-numeric-widen", + "-Xfatal-warnings" + ] + } + + dependencies { + add(variant.configuration('api'), "org.scala-lang:scala-library:${variant.scalaVersion}") + add(variant.configuration('api'), "org.scala-lang:scala-reflect:${variant.scalaVersion}") + add(variant.configuration('api'), "org.apache.spark:spark-core_${variant.scalaMajorVersion}:${variant.sparkVersion}") { + exclude group: 'javax.servlet' + exclude group: 'org.apache.hadoop' + } + + add(variant.configuration('implementation'), project(":elasticsearch-hadoop-mr")) + add(variant.configuration('implementation'), "commons-logging:commons-logging:1.1.1") + + add(variant.configuration('compileOnly'), "com.fasterxml.jackson.module:jackson-module-scala_${variant.scalaMajorVersion}:2.6.7.1") + add(variant.configuration('compileOnly'), "com.fasterxml.jackson.core:jackson-annotations:2.6.7") + add(variant.configuration('compileOnly'), "com.google.guava:guava:14.0.1") + add(variant.configuration('compileOnly'), "com.google.protobuf:protobuf-java:2.5.0") + add(variant.configuration('compileOnly'), "org.slf4j:slf4j-api:1.7.6") + + add(variant.configuration('test', 'implementation'), project(":test:shared")) + add(variant.configuration('test', 'implementation'), "com.esotericsoftware.kryo:kryo:2.21") + add(variant.configuration('test', 'implementation'), "org.apache.spark:spark-core_${variant.scalaMajorVersion}:${variant.sparkVersion}") { + exclude group: 'javax.servlet' + exclude group: 'org.apache.hadoop' + } + + add(variant.configuration('itest', 'implementation'), project(":test:shared")) + + if (variant.scalaMajorVersion == '2.10') { + add(variant.configuration('implementation'), "org.apache.spark:spark-unsafe_${variant.scalaMajorVersion}:${variant.sparkVersion}") + add(variant.configuration('implementation'), "org.apache.avro:avro:1.7.7") + add(variant.configuration('implementation'), "log4j:log4j:1.2.17") + add(variant.configuration('implementation'), "com.google.code.findbugs:jsr305:2.0.1") + add(variant.configuration('implementation'), "org.json4s:json4s-ast_2.10:3.2.10") + add(variant.configuration('implementation'), "com.esotericsoftware.kryo:kryo:2.21") + add(variant.configuration('compileOnly'), "org.apache.hadoop:hadoop-annotations:${project.ext.hadoopVersion}") + add(variant.configuration('compileOnly'), "org.codehaus.jackson:jackson-core-asl:${project.ext.jacksonVersion}") + add(variant.configuration('compileOnly'), "org.codehaus.jackson:jackson-mapper-asl:${project.ext.jacksonVersion}") + if (variant.sparkVersion == spark22Version) { + add(variant.configuration('compileOnly'), "org.apache.spark:spark-tags_${variant.scalaMajorVersion}:${variant.sparkVersion}") + } + } + + add(variant.configuration('additionalSources'), project(":elasticsearch-hadoop-mr")) + add(variant.configuration('javadocSources'), project(":elasticsearch-hadoop-mr")) + } + + def javaFilesOnly = { FileTreeElement spec -> + spec.file.name.endsWith('.java') || spec.isDirectory() + } + + // Add java files from scala source set to javadocSourceElements. + project.fileTree("src/main/scala").include(javaFilesOnly).each { + project.artifacts.add(variant.configuration('javadocSourceElements'), it) + } + + // Configure java source generation for javadoc purposes + if (variant.scalaMajorVersion != '2.10') { + String generatedJavaDirectory = "$buildDir/generated/java/${variant.name}" + Configuration scalaCompilerPlugin = project.configurations.maybeCreate(variant.configuration('scalaCompilerPlugin')) + scalaCompilerPlugin.defaultDependencies { dependencies -> + dependencies.add(project.dependencies.create("com.typesafe.genjavadoc:genjavadoc-plugin_${variant.scalaVersion}:0.13")) + } + + ScalaCompile compileScala = tasks.getByName(scalaCompileTaskName) as ScalaCompile + compileScala.scalaCompileOptions.with { + additionalParameters = [ + "-Xplugin:" + configurations.getByName(variant.configuration('scalaCompilerPlugin')).asPath, + "-P:genjavadoc:out=$generatedJavaDirectory".toString() + ] + } + // Export generated Java code from the genjavadoc compiler plugin + artifacts { + add(variant.configuration('javadocSourceElements'), project.file(generatedJavaDirectory)) { + builtBy compileScala + } + } + tasks.getByName(variant.taskName('javadoc')) { + dependsOn compileScala + source(generatedJavaDirectory) + } + } + + scaladoc { + title = "${rootProject.description} ${version} API" + } + } +} + +// deal with the messy conflicts out there +// Ignore the scalaCompilerPlugin configurations since it is immediately resolved to configure the scala compiler tasks +configurations.matching{ it.name.contains('CompilerPlugin') == false }.all { Configuration conf -> + conf.resolutionStrategy { + eachDependency { details -> + // change all javax.servlet artifacts to the one used by Spark otherwise these will lead to + // SecurityException (signer information wrong) + if (details.requested.name.contains("servlet") && !details.requested.name.contains("guice")) { + details.useTarget group: "org.eclipse.jetty.orbit", name: "javax.servlet", version: "3.0.0.v201112011016" + } + } + } + conf.exclude group: "org.mortbay.jetty" +} + +// Set minimum compatibility and java home for compiler task +tasks.withType(ScalaCompile) { ScalaCompile task -> + task.sourceCompatibility = project.ext.minimumRuntimeVersion + task.targetCompatibility = project.ext.minimumRuntimeVersion + task.options.forkOptions.executable = new File(project.ext.runtimeJavaHome, 'bin/java').absolutePath +} diff --git a/spark/core/licenses/commons-logging-1.1.1.jar.sha1 b/spark/core/licenses/commons-logging-1.1.1.jar.sha1 new file mode 100644 index 000000000..66a2bd0ee --- /dev/null +++ b/spark/core/licenses/commons-logging-1.1.1.jar.sha1 @@ -0,0 +1 @@ +5043bfebc3db072ed80fbd362e7caf00e885d8ae \ No newline at end of file diff --git a/spark/core/licenses/commons-logging-LICENSE.txt b/spark/core/licenses/commons-logging-LICENSE.txt new file mode 100644 index 000000000..7a4a3ea24 --- /dev/null +++ b/spark/core/licenses/commons-logging-LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/spark/core/licenses/commons-logging-NOTICE.txt b/spark/core/licenses/commons-logging-NOTICE.txt new file mode 100644 index 000000000..3d948f5a6 --- /dev/null +++ b/spark/core/licenses/commons-logging-NOTICE.txt @@ -0,0 +1,5 @@ +Apache Commons Logging +Copyright 2003-2007 The Apache Software Foundation + +This product includes software developed by +The Apache Software Foundation (http://www.apache.org/). diff --git a/spark/core/licenses/scala-LICENSE.txt b/spark/core/licenses/scala-LICENSE.txt new file mode 100644 index 000000000..5d42cf219 --- /dev/null +++ b/spark/core/licenses/scala-LICENSE.txt @@ -0,0 +1,29 @@ +Copyright (c) 2002-2017 EPFL + +Copyright (c) 2011-2017 Lightbend, Inc. + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the EPFL nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/spark/core/licenses/scala-NOTICE.txt b/spark/core/licenses/scala-NOTICE.txt new file mode 100644 index 000000000..6352026be --- /dev/null +++ b/spark/core/licenses/scala-NOTICE.txt @@ -0,0 +1,67 @@ +Scala is licensed under the [BSD 3-Clause License](http://opensource.org/licenses/BSD-3-Clause). + +## Scala License + +Copyright (c) 2002-2017 EPFL + +Copyright (c) 2011-2017 Lightbend, Inc. + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the EPFL nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Other Licenses + +This software includes projects with the following licenses, +which are also included in the `licenses/` directory: + +### [Apache License](http://www.apache.org/licenses/LICENSE-2.0.html) +This license is used by the following third-party libraries: + + * jansi + +### [BSD License](http://www.opensource.org/licenses/bsd-license.php) +This license is used by the following third-party libraries: + + * jline + +### [BSD 3-Clause License](http://opensource.org/licenses/BSD-3-Clause) +This license is used by the following third-party libraries: + + * asm + +### [MIT License](http://www.opensource.org/licenses/MIT) +This license is used by the following third-party libraries: + + * jquery + * jquery-ui + * jquery-layout + * sizzle + * tools tooltip + +### Public Domain +The following libraries are freely available in the public domain: + + * forkjoin diff --git a/spark/core/licenses/scala-library-2.11.12.jar.sha1 b/spark/core/licenses/scala-library-2.11.12.jar.sha1 new file mode 100644 index 000000000..f73135c59 --- /dev/null +++ b/spark/core/licenses/scala-library-2.11.12.jar.sha1 @@ -0,0 +1 @@ +bf5534e6fec3d665bd6419c952a929a8bdd4b591 \ No newline at end of file diff --git a/spark/core/licenses/scala-reflect-2.11.12.jar.sha1 b/spark/core/licenses/scala-reflect-2.11.12.jar.sha1 new file mode 100644 index 000000000..360573357 --- /dev/null +++ b/spark/core/licenses/scala-reflect-2.11.12.jar.sha1 @@ -0,0 +1 @@ +2bb23c13c527566d9828107ca4108be2a2c06f01 \ No newline at end of file diff --git a/spark/core/licenses/spark-LICENSE.txt b/spark/core/licenses/spark-LICENSE.txt new file mode 100644 index 000000000..7a4a3ea24 --- /dev/null +++ b/spark/core/licenses/spark-LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/spark/core/licenses/spark-NOTICE.txt b/spark/core/licenses/spark-NOTICE.txt new file mode 100644 index 000000000..c662222e4 --- /dev/null +++ b/spark/core/licenses/spark-NOTICE.txt @@ -0,0 +1,28 @@ +Apache Spark +Copyright 2014 and onwards The Apache Software Foundation. + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + + +Export Control Notice +--------------------- + +This distribution includes cryptographic software. The country in which you currently reside may have +restrictions on the import, possession, use, and/or re-export to another country, of encryption software. +BEFORE using any encryption software, please check your country's laws, regulations and policies concerning +the import, possession, or use, and re-export of encryption software, to see if this is permitted. See + for more information. + +The U.S. Government Department of Commerce, Bureau of Industry and Security (BIS), has classified this +software as Export Commodity Control Number (ECCN) 5D002.C.1, which includes information security software +using or performing cryptographic functions with asymmetric algorithms. The form and manner of this Apache +Software Foundation distribution makes it eligible for export under the License Exception ENC Technology +Software Unrestricted (TSU) exception (see the BIS Export Administration Regulations, Section 740.13) for +both object code and source code. + +The following provides more details on the included cryptographic software: + +This software uses Apache Commons Crypto (https://commons.apache.org/proper/commons-crypto/) to +support authentication, and encryption and decryption of data sent across the network between +services. \ No newline at end of file diff --git a/spark/core/licenses/spark-core_2.11-2.4.4.jar.sha1 b/spark/core/licenses/spark-core_2.11-2.4.4.jar.sha1 new file mode 100644 index 000000000..87bdd6969 --- /dev/null +++ b/spark/core/licenses/spark-core_2.11-2.4.4.jar.sha1 @@ -0,0 +1 @@ +98226adb9bd1fb12479f5da1888e22b0fc89e1aa \ No newline at end of file diff --git a/spark/core/itest/java/org/elasticsearch/spark/integration/AbstractHadoopBasicSparkTest.java b/spark/core/src/itest/java/org/elasticsearch/spark/integration/AbstractHadoopBasicSparkTest.java similarity index 100% rename from spark/core/itest/java/org/elasticsearch/spark/integration/AbstractHadoopBasicSparkTest.java rename to spark/core/src/itest/java/org/elasticsearch/spark/integration/AbstractHadoopBasicSparkTest.java diff --git a/spark/core/itest/java/org/elasticsearch/spark/integration/AbstractJavaEsSparkTest.java b/spark/core/src/itest/java/org/elasticsearch/spark/integration/AbstractJavaEsSparkTest.java similarity index 100% rename from spark/core/itest/java/org/elasticsearch/spark/integration/AbstractJavaEsSparkTest.java rename to spark/core/src/itest/java/org/elasticsearch/spark/integration/AbstractJavaEsSparkTest.java diff --git a/spark/core/itest/java/org/elasticsearch/spark/integration/SparkSuite.java b/spark/core/src/itest/java/org/elasticsearch/spark/integration/SparkSuite.java similarity index 100% rename from spark/core/itest/java/org/elasticsearch/spark/integration/SparkSuite.java rename to spark/core/src/itest/java/org/elasticsearch/spark/integration/SparkSuite.java diff --git a/spark/core/itest/java/org/elasticsearch/spark/integration/SparkUtils.java b/spark/core/src/itest/java/org/elasticsearch/spark/integration/SparkUtils.java similarity index 100% rename from spark/core/itest/java/org/elasticsearch/spark/integration/SparkUtils.java rename to spark/core/src/itest/java/org/elasticsearch/spark/integration/SparkUtils.java diff --git a/spark/core/itest/resources/basic.json b/spark/core/src/itest/resources/basic.json similarity index 100% rename from spark/core/itest/resources/basic.json rename to spark/core/src/itest/resources/basic.json diff --git a/spark/core/itest/resources/simple.json b/spark/core/src/itest/resources/simple.json similarity index 100% rename from spark/core/itest/resources/simple.json rename to spark/core/src/itest/resources/simple.json diff --git a/spark/core/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSpark.scala b/spark/core/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSpark.scala similarity index 100% rename from spark/core/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSpark.scala rename to spark/core/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSpark.scala diff --git a/spark/core/itest/scala/org/elasticsearch/spark/integration/SparkScalaSuite.java b/spark/core/src/itest/scala/org/elasticsearch/spark/integration/SparkScalaSuite.java similarity index 100% rename from spark/core/itest/scala/org/elasticsearch/spark/integration/SparkScalaSuite.java rename to spark/core/src/itest/scala/org/elasticsearch/spark/integration/SparkScalaSuite.java diff --git a/spark/core/test/scala/org/elasticsearch/spark/serialization/Bean.java b/spark/core/src/itest/scala/org/elasticsearch/spark/serialization/Bean.java similarity index 100% rename from spark/core/test/scala/org/elasticsearch/spark/serialization/Bean.java rename to spark/core/src/itest/scala/org/elasticsearch/spark/serialization/Bean.java diff --git a/spark/core/main/scala/org/elasticsearch/spark/cfg/SparkSettings.java b/spark/core/src/main/scala/org/elasticsearch/spark/cfg/SparkSettings.java similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/cfg/SparkSettings.java rename to spark/core/src/main/scala/org/elasticsearch/spark/cfg/SparkSettings.java diff --git a/spark/core/main/scala/org/elasticsearch/spark/cfg/SparkSettingsManager.java b/spark/core/src/main/scala/org/elasticsearch/spark/cfg/SparkSettingsManager.java similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/cfg/SparkSettingsManager.java rename to spark/core/src/main/scala/org/elasticsearch/spark/cfg/SparkSettingsManager.java diff --git a/spark/core/main/scala/org/elasticsearch/spark/cfg/package-info.java b/spark/core/src/main/scala/org/elasticsearch/spark/cfg/package-info.java similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/cfg/package-info.java rename to spark/core/src/main/scala/org/elasticsearch/spark/cfg/package-info.java diff --git a/spark/core/main/scala/org/elasticsearch/spark/package.scala b/spark/core/src/main/scala/org/elasticsearch/spark/package.scala similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/package.scala rename to spark/core/src/main/scala/org/elasticsearch/spark/package.scala diff --git a/spark/core/main/scala/org/elasticsearch/spark/rdd/AbstractEsRDD.scala b/spark/core/src/main/scala/org/elasticsearch/spark/rdd/AbstractEsRDD.scala similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/rdd/AbstractEsRDD.scala rename to spark/core/src/main/scala/org/elasticsearch/spark/rdd/AbstractEsRDD.scala diff --git a/spark/core/main/scala/org/elasticsearch/spark/rdd/AbstractEsRDDIterator.scala b/spark/core/src/main/scala/org/elasticsearch/spark/rdd/AbstractEsRDDIterator.scala similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/rdd/AbstractEsRDDIterator.scala rename to spark/core/src/main/scala/org/elasticsearch/spark/rdd/AbstractEsRDDIterator.scala diff --git a/spark/core/main/scala/org/elasticsearch/spark/rdd/CompatUtils.java b/spark/core/src/main/scala/org/elasticsearch/spark/rdd/CompatUtils.java similarity index 69% rename from spark/core/main/scala/org/elasticsearch/spark/rdd/CompatUtils.java rename to spark/core/src/main/scala/org/elasticsearch/spark/rdd/CompatUtils.java index 9fddeb50a..7b9dfb397 100644 --- a/spark/core/main/scala/org/elasticsearch/spark/rdd/CompatUtils.java +++ b/spark/core/src/main/scala/org/elasticsearch/spark/rdd/CompatUtils.java @@ -55,28 +55,37 @@ static void checkSparkLibraryCompatibility(boolean throwOnIncompatible) { boolean isSpark13Level = ObjectUtils.isClassPresent("org.apache.spark.sql.DataFrame", SparkConf.class.getClassLoader()); boolean isSpark20Level = ObjectUtils.isClassPresent("org.apache.spark.sql.streaming.StreamingQuery", SparkConf.class.getClassLoader()); - CompatibilityLevel compatibilityLevel = ObjectUtils.instantiate("org.elasticsearch.spark.sql.SparkSQLCompatibilityLevel", CompatUtils.class.getClassLoader()); - - boolean isEshForSpark20 = "20".equals(compatibilityLevel.versionId()); - String esSupportedSparkVersion = compatibilityLevel.versionDescription(); - - String errorMessage = null; - - if (!(isSpark13Level || isSpark20Level)) { - String sparkVersion = getSparkVersionOr("1.0-1.2"); - errorMessage = String.format("Incorrect classpath detected; Elasticsearch Spark compiled for Spark %s but used with unsupported Spark version %s", - esSupportedSparkVersion, sparkVersion); - } else if (isSpark20Level != isEshForSpark20) { // XOR can be applied as well but != increases readability - String sparkVersion = getSparkVersionOr(isSpark13Level ? "1.3-1.6" : "2.0+"); - errorMessage = String.format("Incorrect classpath detected; Elasticsearch Spark compiled for Spark %s but used with Spark %s", - esSupportedSparkVersion, sparkVersion); - } + try { + CompatibilityLevel compatibilityLevel = ObjectUtils.instantiate("org.elasticsearch.spark.sql.SparkSQLCompatibilityLevel", CompatUtils.class.getClassLoader()); + boolean isEshForSpark20 = "20".equals(compatibilityLevel.versionId()); + String esSupportedSparkVersion = compatibilityLevel.versionDescription(); + + String errorMessage = null; + + if (!(isSpark13Level || isSpark20Level)) { + String sparkVersion = getSparkVersionOr("1.0-1.2"); + errorMessage = String.format("Incorrect classpath detected; Elasticsearch Spark compiled for Spark %s but used with unsupported Spark version %s", + esSupportedSparkVersion, sparkVersion); + } else if (isSpark20Level != isEshForSpark20) { // XOR can be applied as well but != increases readability + String sparkVersion = getSparkVersionOr(isSpark13Level ? "1.3-1.6" : "2.0+"); + errorMessage = String.format("Incorrect classpath detected; Elasticsearch Spark compiled for Spark %s but used with Spark %s", + esSupportedSparkVersion, sparkVersion); + } - if (errorMessage != null) { + if (errorMessage != null) { + if (throwOnIncompatible) { + throw new EsHadoopIllegalStateException(errorMessage); + } else { + LogFactory.getLog("org.elasticsearch.spark.rdd.EsSpark").warn(errorMessage); + } + } + } catch (EsHadoopIllegalStateException noClass) { + // In the event that someone is using the core jar without sql support, (like in our tests) this will be logged instead. + String errorMessage = "Elasticsearch Spark SQL support could not be verified."; if (throwOnIncompatible) { - throw new EsHadoopIllegalStateException(errorMessage); + throw new EsHadoopIllegalStateException(errorMessage, noClass); } else { - LogFactory.getLog("org.elasticsearch.spark.rdd.EsSpark").warn(errorMessage); + LogFactory.getLog("org.elasticsearch.spark.rdd.EsSpark").info(errorMessage + " Continuing with core support."); } } } diff --git a/spark/core/main/scala/org/elasticsearch/spark/rdd/CompatibilityLevel.scala b/spark/core/src/main/scala/org/elasticsearch/spark/rdd/CompatibilityLevel.scala similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/rdd/CompatibilityLevel.scala rename to spark/core/src/main/scala/org/elasticsearch/spark/rdd/CompatibilityLevel.scala diff --git a/spark/core/main/scala/org/elasticsearch/spark/rdd/EsRDDWriter.scala b/spark/core/src/main/scala/org/elasticsearch/spark/rdd/EsRDDWriter.scala similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/rdd/EsRDDWriter.scala rename to spark/core/src/main/scala/org/elasticsearch/spark/rdd/EsRDDWriter.scala diff --git a/spark/core/main/scala/org/elasticsearch/spark/rdd/EsSpark.scala b/spark/core/src/main/scala/org/elasticsearch/spark/rdd/EsSpark.scala similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/rdd/EsSpark.scala rename to spark/core/src/main/scala/org/elasticsearch/spark/rdd/EsSpark.scala diff --git a/spark/core/main/scala/org/elasticsearch/spark/rdd/JavaEsRDD.scala b/spark/core/src/main/scala/org/elasticsearch/spark/rdd/JavaEsRDD.scala similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/rdd/JavaEsRDD.scala rename to spark/core/src/main/scala/org/elasticsearch/spark/rdd/JavaEsRDD.scala diff --git a/spark/core/main/scala/org/elasticsearch/spark/rdd/Metadata.java b/spark/core/src/main/scala/org/elasticsearch/spark/rdd/Metadata.java similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/rdd/Metadata.java rename to spark/core/src/main/scala/org/elasticsearch/spark/rdd/Metadata.java diff --git a/spark/core/main/scala/org/elasticsearch/spark/rdd/ScalaEsRDD.scala b/spark/core/src/main/scala/org/elasticsearch/spark/rdd/ScalaEsRDD.scala similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/rdd/ScalaEsRDD.scala rename to spark/core/src/main/scala/org/elasticsearch/spark/rdd/ScalaEsRDD.scala diff --git a/spark/core/main/scala/org/elasticsearch/spark/rdd/api/java/JavaEsSpark.scala b/spark/core/src/main/scala/org/elasticsearch/spark/rdd/api/java/JavaEsSpark.scala similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/rdd/api/java/JavaEsSpark.scala rename to spark/core/src/main/scala/org/elasticsearch/spark/rdd/api/java/JavaEsSpark.scala diff --git a/spark/core/main/scala/org/elasticsearch/spark/rdd/api/java/package-info.java b/spark/core/src/main/scala/org/elasticsearch/spark/rdd/api/java/package-info.java similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/rdd/api/java/package-info.java rename to spark/core/src/main/scala/org/elasticsearch/spark/rdd/api/java/package-info.java diff --git a/spark/core/main/scala/org/elasticsearch/spark/serialization/ReflectionUtils.scala b/spark/core/src/main/scala/org/elasticsearch/spark/serialization/ReflectionUtils.scala similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/serialization/ReflectionUtils.scala rename to spark/core/src/main/scala/org/elasticsearch/spark/serialization/ReflectionUtils.scala diff --git a/spark/core/main/scala/org/elasticsearch/spark/serialization/ScalaMapFieldExtractor.scala b/spark/core/src/main/scala/org/elasticsearch/spark/serialization/ScalaMapFieldExtractor.scala similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/serialization/ScalaMapFieldExtractor.scala rename to spark/core/src/main/scala/org/elasticsearch/spark/serialization/ScalaMapFieldExtractor.scala diff --git a/spark/core/main/scala/org/elasticsearch/spark/serialization/ScalaMetadataExtractor.scala b/spark/core/src/main/scala/org/elasticsearch/spark/serialization/ScalaMetadataExtractor.scala similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/serialization/ScalaMetadataExtractor.scala rename to spark/core/src/main/scala/org/elasticsearch/spark/serialization/ScalaMetadataExtractor.scala diff --git a/spark/core/main/scala/org/elasticsearch/spark/serialization/ScalaValueReader.scala b/spark/core/src/main/scala/org/elasticsearch/spark/serialization/ScalaValueReader.scala similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/serialization/ScalaValueReader.scala rename to spark/core/src/main/scala/org/elasticsearch/spark/serialization/ScalaValueReader.scala diff --git a/spark/core/main/scala/org/elasticsearch/spark/serialization/ScalaValueWriter.scala b/spark/core/src/main/scala/org/elasticsearch/spark/serialization/ScalaValueWriter.scala similarity index 100% rename from spark/core/main/scala/org/elasticsearch/spark/serialization/ScalaValueWriter.scala rename to spark/core/src/main/scala/org/elasticsearch/spark/serialization/ScalaValueWriter.scala diff --git a/spark/core/test/scala/org/elasticsearch/spark/ScalaExtendedBooleanValueReaderTest.scala b/spark/core/src/test/scala/org/elasticsearch/spark/ScalaExtendedBooleanValueReaderTest.scala similarity index 100% rename from spark/core/test/scala/org/elasticsearch/spark/ScalaExtendedBooleanValueReaderTest.scala rename to spark/core/src/test/scala/org/elasticsearch/spark/ScalaExtendedBooleanValueReaderTest.scala diff --git a/spark/core/test/scala/org/elasticsearch/spark/ScalaValueReaderTest.scala b/spark/core/src/test/scala/org/elasticsearch/spark/ScalaValueReaderTest.scala similarity index 100% rename from spark/core/test/scala/org/elasticsearch/spark/ScalaValueReaderTest.scala rename to spark/core/src/test/scala/org/elasticsearch/spark/ScalaValueReaderTest.scala diff --git a/spark/core/test/scala/org/elasticsearch/spark/cfg/SparkConfigTest.scala b/spark/core/src/test/scala/org/elasticsearch/spark/cfg/SparkConfigTest.scala similarity index 100% rename from spark/core/test/scala/org/elasticsearch/spark/cfg/SparkConfigTest.scala rename to spark/core/src/test/scala/org/elasticsearch/spark/cfg/SparkConfigTest.scala diff --git a/spark/core/src/test/scala/org/elasticsearch/spark/serialization/Bean.java b/spark/core/src/test/scala/org/elasticsearch/spark/serialization/Bean.java new file mode 100644 index 000000000..1992c35ed --- /dev/null +++ b/spark/core/src/test/scala/org/elasticsearch/spark/serialization/Bean.java @@ -0,0 +1,51 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.spark.serialization; + +import java.io.Serializable; + +public class Bean implements Serializable { + + private String foo; + private Number id; + private boolean bool; + + public Bean() {} + + public Bean(String foo, Number bar, boolean bool) { + this.foo = foo; + this.id = bar; + this.bool = bool; + } + public String getFoo() { + return foo; + } + public void setFoo(String foo) { + this.foo = foo; + } + public Number getId() { + return id; + } + public void setBar(Number bar) { + this.id = bar; + } + public boolean isBool() { + return bool; + } +} diff --git a/spark/core/test/scala/org/elasticsearch/spark/serialization/ScalaReflectionUtilsTest.scala b/spark/core/src/test/scala/org/elasticsearch/spark/serialization/ScalaReflectionUtilsTest.scala similarity index 100% rename from spark/core/test/scala/org/elasticsearch/spark/serialization/ScalaReflectionUtilsTest.scala rename to spark/core/src/test/scala/org/elasticsearch/spark/serialization/ScalaReflectionUtilsTest.scala diff --git a/spark/core/test/scala/org/elasticsearch/spark/serialization/ScalaValueWriterTest.scala b/spark/core/src/test/scala/org/elasticsearch/spark/serialization/ScalaValueWriterTest.scala similarity index 100% rename from spark/core/test/scala/org/elasticsearch/spark/serialization/ScalaValueWriterTest.scala rename to spark/core/src/test/scala/org/elasticsearch/spark/serialization/ScalaValueWriterTest.scala diff --git a/spark/core/test/scala/org/elasticsearch/spark/serialization/handler/write/imple/ScalaSerializationEventConverterTest.scala b/spark/core/src/test/scala/org/elasticsearch/spark/serialization/handler/write/imple/ScalaSerializationEventConverterTest.scala similarity index 100% rename from spark/core/test/scala/org/elasticsearch/spark/serialization/handler/write/imple/ScalaSerializationEventConverterTest.scala rename to spark/core/src/test/scala/org/elasticsearch/spark/serialization/handler/write/imple/ScalaSerializationEventConverterTest.scala diff --git a/spark/core/test/scala/org/elasticsearch/spark/serialization/testbeans/Contact.java b/spark/core/src/test/scala/org/elasticsearch/spark/serialization/testbeans/Contact.java similarity index 100% rename from spark/core/test/scala/org/elasticsearch/spark/serialization/testbeans/Contact.java rename to spark/core/src/test/scala/org/elasticsearch/spark/serialization/testbeans/Contact.java diff --git a/spark/core/test/scala/org/elasticsearch/spark/serialization/testbeans/ContactBook.java b/spark/core/src/test/scala/org/elasticsearch/spark/serialization/testbeans/ContactBook.java similarity index 100% rename from spark/core/test/scala/org/elasticsearch/spark/serialization/testbeans/ContactBook.java rename to spark/core/src/test/scala/org/elasticsearch/spark/serialization/testbeans/ContactBook.java diff --git a/spark/sql-13/build.gradle b/spark/sql-13/build.gradle index 8b3eac6bf..a2d61238b 100644 --- a/spark/sql-13/build.gradle +++ b/spark/sql-13/build.gradle @@ -1,225 +1,189 @@ +import org.elasticsearch.hadoop.gradle.scala.SparkVariantPlugin description = "Elasticsearch Spark (for Spark 1.3-1.6)" -evaluationDependsOn(':elasticsearch-hadoop-mr') - apply plugin: 'java-library' apply plugin: 'scala' apply plugin: 'es.hadoop.build' -apply plugin: 'scala.variants' - -variants { - defaultVersion '2.11.12' - targetVersions '2.10.7', '2.11.12' -} - -println "Compiled using Scala ${project.ext.scalaMajorVersion} [${project.ext.scalaVersion}]" -String sparkVersion = spark13Version - -configurations { - embedded { - transitive = false - canBeResolved = true - } - implementation { - extendsFrom project.configurations.embedded - } - scalaCompilerPlugin { - defaultDependencies { dependencies -> - if (project.ext.scalaMajorVersion != '2.10') { - dependencies.add(project.dependencies.create("com.typesafe.genjavadoc:genjavadoc-plugin_${scalaVersion}:0.13")) +apply plugin: 'spark.variants' + +sparkVariants { + capabilityGroup 'org.elasticsearch.spark.sql.variant' + setDefaultVariant "spark13scala211", spark13Version, scala211Version + addFeatureVariant "spark13scala210", spark13Version, scala210Version + + all { SparkVariantPlugin.SparkVariant variant -> + String scalaCompileTaskName = project.sourceSets + .getByName(variant.getSourceSetName('main')) + .getCompileTaskName('scala') + + project.configurations { + create(variant.configuration('embedded')) { + transitive = false + canBeResolved = true + } + getByName(variant.configuration('implementation')) { + extendsFrom project.configurations.getByName(variant.configuration('embedded')) + } + getByName(variant.configuration('test', 'implementation')) { + exclude group: "org.mortbay.jetty" + } + getByName(variant.configuration('itest', 'implementation')) { + exclude group: "org.mortbay.jetty" } } - } - testImplementation { - exclude group: "org.mortbay.jetty" - } - itestImplementation { - exclude group: "org.mortbay.jetty" - } -} -// deal with the messy conflicts out there -configurations.all { Configuration conf -> - conf.resolutionStrategy { - eachDependency { details -> - // in a similar vein, change all javax.servlet artifacts to the one used by Spark - // otherwise these will lead to SecurityException (signer information wrong) - if (details.requested.name.contains("servlet") && !details.requested.name.contains("guice")) { - details.useTarget group: "org.eclipse.jetty.orbit", name: "javax.servlet", version: "3.0.0.v201112011016" + project.getTasks().getByName(scalaCompileTaskName) { ScalaCompile compileScala -> + configure(compileScala.scalaCompileOptions.forkOptions) { + memoryMaximumSize = '1g' + jvmArgs = ['-XX:MaxPermSize=512m'] } + compileScala.scalaCompileOptions.additionalParameters = [ + "-feature", + "-unchecked", + "-deprecation", + "-Xfuture", + "-Yno-adapted-args", + "-Ywarn-dead-code", + "-Ywarn-numeric-widen", + "-Xfatal-warnings" + ] } - } -} - -tasks.withType(ScalaCompile) { ScalaCompile task -> - task.sourceCompatibility = project.ext.minimumRuntimeVersion - task.targetCompatibility = project.ext.minimumRuntimeVersion - task.options.forkOptions.executable = new File(project.ext.runtimeJavaHome, 'bin/java').absolutePath -} - -compileScala { - configure(scalaCompileOptions.forkOptions) { - memoryMaximumSize = '1g' - jvmArgs = ['-XX:MaxPermSize=512m'] - } - scalaCompileOptions.additionalParameters = [ - "-feature", - "-unchecked", - "-deprecation", - "-Xfuture", - "-Yno-adapted-args", - "-Ywarn-dead-code", - "-Ywarn-numeric-widen", - "-Xfatal-warnings" - ] -} -String coreSrc = file("$projectDir/../core").absolutePath.replace('\\','/') - -sourceSets { - main.scala.srcDirs += "$coreSrc/main/scala" - test.scala.srcDirs += "$coreSrc/test/scala" - itest.java.srcDirs += "$coreSrc/itest/java" - itest.scala.srcDirs += "$coreSrc/itest/scala" - itest.resources.srcDirs += "$coreSrc/itest/resources" -} - -def javaFilesOnly = { FileTreeElement spec -> - spec.file.name.endsWith('.java') || spec.isDirectory() -} + dependencies { + add(variant.configuration('embedded'), project(":elasticsearch-hadoop-mr")) + add(variant.configuration('embedded'), project(":elasticsearch-spark")) { + capabilities { + requireCapability("org.elasticsearch.spark.variant:$variant.name:$project.version") + } + } -artifacts { - sourceElements(project.file("$coreSrc/main/scala")) - // Add java files from core source to javadocElements. - project.fileTree("$coreSrc/main/scala").include(javaFilesOnly).each { - javadocElements(it) - } - project.fileTree("src/main/scala").include(javaFilesOnly).each { - javadocElements(it) - } -} + add(variant.configuration('api'), "org.scala-lang:scala-library:${variant.scalaVersion}") + add(variant.configuration('api'), "org.scala-lang:scala-reflect:${variant.scalaVersion}") + add(variant.configuration('api'), "org.apache.spark:spark-core_${variant.scalaMajorVersion}:$variant.sparkVersion") { + exclude group: 'javax.servlet' + exclude group: 'org.apache.hadoop' + } -// currently the outside project folders are transformed into linked resources however -// Gradle only supports one so the project will be invalid as not all sources will be in there -// as such, they are setup here manually for Eclipse. IntelliJ probably needs a similar approach -eclipse { - project.file.whenMerged { pj -> - // eliminated resources created by gradle + add(variant.configuration('implementation'), "org.apache.spark:spark-sql_${variant.scalaMajorVersion}:$variant.sparkVersion") { + exclude group: 'org.apache.hadoop' + } + add(variant.configuration('implementation'), "org.apache.spark:spark-streaming_${variant.scalaMajorVersion}:$variant.sparkVersion") { + exclude group: 'org.apache.hadoop' + } + add(variant.configuration('implementation'), "commons-logging:commons-logging:1.1.1") + add(variant.configuration('implementation'), "javax.xml.bind:jaxb-api:2.3.1") + add(variant.configuration('implementation'), "org.apache.spark:spark-catalyst_${variant.scalaMajorVersion}:$variant.sparkVersion") + + // Scala compiler needs these for arcane reasons, but they are not used in the api nor the runtime + add(variant.configuration('compileOnly'), "com.fasterxml.jackson.core:jackson-annotations:2.6.7") + add(variant.configuration('compileOnly'), "com.google.guava:guava:16.0.1") + add(variant.configuration('compileOnly'), "org.json4s:json4s-jackson_${variant.scalaMajorVersion}:3.2.11") + add(variant.configuration('compileOnly'), "org.slf4j:slf4j-api:1.7.6") + + if ('2.10'.equals(scalaMajorVersion)) { + add(variant.configuration('implementation'), "org.apache.spark:spark-unsafe_${variant.scalaMajorVersion}:$variant.sparkVersion") + add(variant.configuration('implementation'), "org.apache.avro:avro:1.7.7") + add(variant.configuration('implementation'), "log4j:log4j:1.2.17") + add(variant.configuration('implementation'), "com.google.code.findbugs:jsr305:2.0.1") + add(variant.configuration('implementation'), "org.json4s:json4s-ast_2.10:3.2.10") + add(variant.configuration('implementation'), "com.esotericsoftware.kryo:kryo:2.21") + add(variant.configuration('compileOnly'), "org.apache.hadoop:hadoop-annotations:${project.ext.hadoopVersion}") + add(variant.configuration('compileOnly'), "org.codehaus.jackson:jackson-core-asl:${project.ext.jacksonVersion}") + add(variant.configuration('compileOnly'), "org.codehaus.jackson:jackson-mapper-asl:${project.ext.jacksonVersion}") + } - linkedResources.clear() - linkedResources.add(new org.gradle.plugins.ide.eclipse.model.Link("core/main/scala", "2", "$coreSrc/main/scala", null)) - linkedResources.add(new org.gradle.plugins.ide.eclipse.model.Link("core/test/scala", "2", "$coreSrc/test/scala", null)) - linkedResources.add(new org.gradle.plugins.ide.eclipse.model.Link("core/itest/java", "2", "$coreSrc/itest/java", null)) - linkedResources.add(new org.gradle.plugins.ide.eclipse.model.Link("core/itest/scala", "2", "$coreSrc/itest/scala", null)) - linkedResources.add(new org.gradle.plugins.ide.eclipse.model.Link("core/itest/resources","2", "$coreSrc/itest/resources", null)) + add(variant.configuration('test', 'implementation'), project(":test:shared")) + add(variant.configuration('test', 'implementation'), "org.apache.spark:spark-core_${variant.scalaMajorVersion}:$variant.sparkVersion") { + exclude group: 'javax.servlet' + exclude group: 'org.apache.hadoop' + } + add(variant.configuration('test', 'implementation'), "org.apache.spark:spark-sql_${variant.scalaMajorVersion}:$variant.sparkVersion") { + exclude group: 'org.apache.hadoop' + } - } - classpath.file { - whenMerged { cp -> - entries.removeAll { entry -> - entry.kind == 'src' && (entry.path in ["scala", "java", "resources"] || entry.path.startsWith("itest-") || entry.path.endsWith("-scala")) + add(variant.configuration('itest', 'implementation'), project(":test:shared")) + add(variant.configuration('itest', 'implementation'), "org.apache.spark:spark-streaming_${variant.scalaMajorVersion}:$variant.sparkVersion") { + exclude group: 'org.apache.hadoop' } - entries.add(new org.gradle.plugins.ide.eclipse.model.SourceFolder("core/main/scala", null)) - entries.add(new org.gradle.plugins.ide.eclipse.model.SourceFolder("core/test/scala", null)) - entries.add(new org.gradle.plugins.ide.eclipse.model.SourceFolder("core/itest/java", null)) - entries.add(new org.gradle.plugins.ide.eclipse.model.SourceFolder("core/itest/scala", null)) - entries.add(new org.gradle.plugins.ide.eclipse.model.SourceFolder("core/itest/resources", null)) + add(variant.configuration('additionalSources'), project(":elasticsearch-hadoop-mr")) + add(variant.configuration('javadocSources'), project(":elasticsearch-hadoop-mr")) } - } -} - -dependencies { - embedded(project(":elasticsearch-hadoop-mr")) - api("org.scala-lang:scala-library:${project.ext.scalaVersion}") - api("org.scala-lang:scala-reflect:${project.ext.scalaVersion}") - api("org.apache.spark:spark-core_${project.ext.scalaMajorVersion}:$sparkVersion") { - exclude group: 'javax.servlet' - exclude group: 'org.apache.hadoop' - } - - implementation("org.apache.spark:spark-sql_${project.ext.scalaMajorVersion}:$sparkVersion") { - exclude group: 'org.apache.hadoop' - } - implementation("org.apache.spark:spark-streaming_${project.ext.scalaMajorVersion}:$sparkVersion") { - exclude group: 'org.apache.hadoop' - } - implementation("commons-logging:commons-logging:1.1.1") - implementation("javax.xml.bind:jaxb-api:2.3.1") - implementation("org.apache.spark:spark-catalyst_${project.ext.scalaMajorVersion}:$sparkVersion") - - // Scala compiler needs these for arcane reasons, but they are not used in the api nor the runtime - compileOnly("com.fasterxml.jackson.core:jackson-annotations:2.6.7") - compileOnly("com.google.guava:guava:16.0.1") - compileOnly("org.json4s:json4s-jackson_${project.ext.scalaMajorVersion}:3.2.11") - compileOnly("org.slf4j:slf4j-api:1.7.6") - - if ('2.10'.equals(scalaMajorVersion)) { - implementation("org.apache.spark:spark-unsafe_${project.ext.scalaMajorVersion}:$sparkVersion") - implementation("org.apache.avro:avro:1.7.7") - implementation("log4j:log4j:1.2.17") - implementation("com.google.code.findbugs:jsr305:2.0.1") - implementation("org.json4s:json4s-ast_2.10:3.2.10") - implementation("com.esotericsoftware.kryo:kryo:2.21") - compileOnly("org.apache.hadoop:hadoop-annotations:${project.ext.hadoopVersion}") - compileOnly("org.codehaus.jackson:jackson-core-asl:${project.ext.jacksonVersion}") - compileOnly("org.codehaus.jackson:jackson-mapper-asl:${project.ext.jacksonVersion}") - } + def javaFilesOnly = { FileTreeElement spec -> + spec.file.name.endsWith('.java') || spec.isDirectory() + } - testImplementation(project(":test:shared")) - testImplementation("org.apache.spark:spark-core_${project.ext.scalaMajorVersion}:$sparkVersion") { - exclude group: 'javax.servlet' - exclude group: 'org.apache.hadoop' - } - testImplementation("org.apache.spark:spark-sql_${project.ext.scalaMajorVersion}:$sparkVersion") { - exclude group: 'org.apache.hadoop' - } + // Add java files from scala source set to javadocSourceElements. + project.fileTree("src/main/scala").include(javaFilesOnly).each { + project.artifacts.add(variant.configuration('javadocSourceElements'), it) + } - itestImplementation(project(":test:shared")) - itestImplementation("org.apache.spark:spark-streaming_${project.ext.scalaMajorVersion}:$sparkVersion") { - exclude group: 'org.apache.hadoop' - } + if (variant.scalaMajorVersion != '2.10') { + // Configure java source generation for javadoc purposes + String generatedJavaDirectory = "$buildDir/generated/java/${variant.name}" + Configuration scalaCompilerPlugin = project.configurations.maybeCreate(variant.configuration('scalaCompilerPlugin')) + scalaCompilerPlugin.defaultDependencies { dependencies -> + dependencies.add(project.dependencies.create("com.typesafe.genjavadoc:genjavadoc-plugin_${variant.scalaVersion}:0.13")) + } - additionalSources(project(":elasticsearch-hadoop-mr")) - javadocSources(project(":elasticsearch-hadoop-mr")) -} + ScalaCompile compileScala = tasks.getByName(scalaCompileTaskName) as ScalaCompile + compileScala.scalaCompileOptions.with { + additionalParameters = [ + "-Xplugin:" + configurations.scalaCompilerPlugin.asPath, + "-P:genjavadoc:out=$buildDir/generated/java".toString() + ] + } -// Export generated Java code from the genjavadoc compiler plugin -artifacts { - javadocElements(project.file("$buildDir/generated/java")) { - builtBy compileScala - } -} + // Export generated Java code from the genjavadoc compiler plugin + artifacts { + add(variant.configuration('javadocSourceElements'), project.file(generatedJavaDirectory)) { + builtBy compileScala + } + } + tasks.getByName(variant.taskName('javadoc')) { + dependsOn compileScala + source(generatedJavaDirectory) + } + } -jar { - dependsOn(project.configurations.embedded) - from(project.configurations.embedded.collect { it.isDirectory() ? it : zipTree(it)}) { - include "org/elasticsearch/hadoop/**" - include "esh-build.properties" - include "META-INF/services/*" + scaladoc { + title = "${rootProject.description} ${version} API" + } } } -javadoc { - if (project.ext.scalaMajorVersion != '2.10') { - dependsOn compileScala - source += "$buildDir/generated/java" +// deal with the messy conflicts out there +configurations.matching { it.name.contains('CompilerPlugin') == false }.all { Configuration conf -> + conf.resolutionStrategy { + eachDependency { details -> + // in a similar vein, change all javax.servlet artifacts to the one used by Spark + // otherwise these will lead to SecurityException (signer information wrong) + if (details.requested.name.contains("servlet") && !details.requested.name.contains("guice")) { + details.useTarget group: "org.eclipse.jetty.orbit", name: "javax.servlet", version: "3.0.0.v201112011016" + } + } } } -scaladoc { - title = "${rootProject.description} ${version} API" +tasks.withType(ScalaCompile) { ScalaCompile task -> + task.sourceCompatibility = project.ext.minimumRuntimeVersion + task.targetCompatibility = project.ext.minimumRuntimeVersion + task.options.forkOptions.executable = new File(project.ext.runtimeJavaHome, 'bin/java').absolutePath } -tasks.withType(ScalaCompile) { - if (project.ext.scalaMajorVersion != '2.10') { - scalaCompileOptions.with { - additionalParameters = [ - "-Xplugin:" + configurations.scalaCompilerPlugin.asPath, - "-P:genjavadoc:out=$buildDir/generated/java".toString() - ] +// Embed the embedded dependencies in the final jar after all configuration is complete +sparkVariants { + all { SparkVariantPlugin.SparkVariant variant -> + tasks.getByName(variant.taskName('jar')) { + dependsOn(project.configurations.getByName(variant.configuration('embedded'))) + // TODO: Is there a way to do this lazily? This looks like it resolves the configuration. + from(project.configurations.getByName(variant.configuration('embedded')).collect { it.isDirectory() ? it : zipTree(it)}) { + include "org/elasticsearch/**" + include "esh-build.properties" + include "META-INF/services/*" + } } } } diff --git a/spark/sql-13/src/itest/java/org/elasticsearch/spark/integration/SparkUtils.java b/spark/sql-13/src/itest/java/org/elasticsearch/spark/integration/SparkUtils.java new file mode 100644 index 000000000..b49e49b80 --- /dev/null +++ b/spark/sql-13/src/itest/java/org/elasticsearch/spark/integration/SparkUtils.java @@ -0,0 +1,40 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.spark.integration; + +import com.esotericsoftware.kryo.Kryo; +import org.apache.spark.SparkConf; +import org.elasticsearch.hadoop.Provisioner; +import org.elasticsearch.hadoop.util.ReflectionUtils; + +import java.lang.reflect.Constructor; + +public abstract class SparkUtils { + + public static final String[] ES_SPARK_TESTING_JAR = new String[] {Provisioner.ESHADOOP_TESTING_JAR}; + + public static Kryo sparkSerializer(SparkConf conf) throws Exception { + // reflection galore + Class ks = Class.forName("org.apache.spark.serializer.KryoSerializer", true, conf.getClass().getClassLoader()); + Constructor ctr = ks.getDeclaredConstructor(SparkConf.class); + Object ksInstance = ctr.newInstance(conf); + Kryo kryo = ReflectionUtils.invoke(ReflectionUtils.findMethod(ks, "newKryo"), ksInstance); + return kryo; + } +} diff --git a/spark/sql-13/src/itest/resources/basic.json b/spark/sql-13/src/itest/resources/basic.json new file mode 100644 index 000000000..c4538e9c4 --- /dev/null +++ b/spark/sql-13/src/itest/resources/basic.json @@ -0,0 +1 @@ +{ "firstName": "John", "isAlive": true, "age": 25, "children": ["Alex", "Joe"], "address": { "streetAddress": "21 2nd Street" } } diff --git a/spark/sql-13/src/itest/resources/simple.json b/spark/sql-13/src/itest/resources/simple.json new file mode 100644 index 000000000..715b02d41 --- /dev/null +++ b/spark/sql-13/src/itest/resources/simple.json @@ -0,0 +1 @@ +{"key":"value"} \ No newline at end of file diff --git a/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsScalaSparkStreaming.scala b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsScalaSparkStreaming.scala index 3a12b6f04..e5dc64eee 100644 --- a/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsScalaSparkStreaming.scala +++ b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsScalaSparkStreaming.scala @@ -37,7 +37,7 @@ import org.elasticsearch.hadoop.util.TestUtils.docEndpoint import org.elasticsearch.hadoop.util.{EsMajorVersion, StringUtils, TestSettings} import org.elasticsearch.spark.rdd.EsSpark import org.elasticsearch.spark.rdd.Metadata._ -import org.elasticsearch.spark.serialization.{Bean, ReflectionUtils} +import org.elasticsearch.spark.serialization.{Bean, Garbage, ModuleCaseClass, ReflectionUtils, Trip} import org.elasticsearch.spark.streaming._ import org.hamcrest.Matchers._ import org.junit.Assert._ @@ -170,7 +170,7 @@ class AbstractScalaEsScalaSparkStreaming(val prefix: String, readMetadata: jl.Bo def testEsRDDWriteCaseClass(): Unit = { val javaBean = new Bean("bar", 1, true) val caseClass1 = Trip("OTP", "SFO") - val caseClass2 = AbstractScalaEsScalaSpark.ModuleCaseClass(1, "OTP", "MUC") + val caseClass2 = ModuleCaseClass(1, "OTP", "MUC") val vals = ReflectionUtils.caseClassValues(caseClass2) diff --git a/spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/Bean.java b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/Bean.java new file mode 100644 index 000000000..1992c35ed --- /dev/null +++ b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/Bean.java @@ -0,0 +1,51 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.spark.serialization; + +import java.io.Serializable; + +public class Bean implements Serializable { + + private String foo; + private Number id; + private boolean bool; + + public Bean() {} + + public Bean(String foo, Number bar, boolean bool) { + this.foo = foo; + this.id = bar; + this.bool = bool; + } + public String getFoo() { + return foo; + } + public void setFoo(String foo) { + this.foo = foo; + } + public Number getId() { + return id; + } + public void setBar(Number bar) { + this.id = bar; + } + public boolean isBool() { + return bool; + } +} diff --git a/spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/Garbage.scala b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/Garbage.scala new file mode 100644 index 000000000..7d97118c5 --- /dev/null +++ b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/Garbage.scala @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.spark.serialization + +class Garbage(i: Int) { + def doNothing(): Unit = () +} diff --git a/spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/ModuleCaseClass.scala b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/ModuleCaseClass.scala new file mode 100644 index 000000000..ed2b88897 --- /dev/null +++ b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/ModuleCaseClass.scala @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.spark.serialization + +case class ModuleCaseClass(id: Integer, departure: String, var arrival: String) { + var l = math.Pi +} diff --git a/spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/Trip.scala b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/Trip.scala new file mode 100644 index 000000000..dfa879eb9 --- /dev/null +++ b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/serialization/Trip.scala @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.spark.serialization + +case class Trip(departure: String, arrival: String) { + var extra = math.Pi +} diff --git a/spark/sql-13/src/itest/scala/org/elasticsearch/spark/sql/ServiceLoadingTest.scala b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/sql/ServiceLoadingTest.scala new file mode 100644 index 000000000..29177f6b2 --- /dev/null +++ b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/sql/ServiceLoadingTest.scala @@ -0,0 +1,39 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.spark.sql + +import java.util.ServiceLoader + +import org.apache.spark.sql.sources.DataSourceRegister +import org.junit.{Assert, Test} + +import scala.collection.JavaConverters._ + +class ServiceLoadingTest { + + @Test + def serviceLoadingTest(): Unit = { + val serviceLoader = ServiceLoader.load(classOf[DataSourceRegister], Thread.currentThread().getContextClassLoader) + if (serviceLoader.asScala.map(_.shortName()).exists(_.equals("es")) == false) { + Assert.fail("Cannot locate 'es' data source") + } + } + +} diff --git a/spark/sql-20/build.gradle b/spark/sql-20/build.gradle index ed802ec2e..9e1ddbd2b 100644 --- a/spark/sql-20/build.gradle +++ b/spark/sql-20/build.gradle @@ -1,221 +1,206 @@ +import org.elasticsearch.hadoop.gradle.scala.SparkVariantPlugin description = "Elasticsearch Spark (for Spark 2.X)" -evaluationDependsOn(':elasticsearch-hadoop-mr') - apply plugin: 'java-library' apply plugin: 'scala' apply plugin: 'es.hadoop.build.integration' -apply plugin: 'scala.variants' - -variants { - defaultVersion '2.11.12' - targetVersions '2.10.7', '2.11.12' -} - -configurations { - embedded { - transitive = false - canBeResolved = true - } - implementation { - extendsFrom project.configurations.embedded - } - scalaCompilerPlugin { - defaultDependencies { dependencies -> - if (project.ext.scalaMajorVersion != '2.10') { - dependencies.add(project.dependencies.create("com.typesafe.genjavadoc:genjavadoc-plugin_${scalaVersion}:0.13")) +apply plugin: 'spark.variants' + +sparkVariants { + capabilityGroup 'org.elasticsearch.spark.sql.variant' + setDefaultVariant "spark20scala211", spark24Version, scala211Version + addFeatureVariant "spark20scala210", spark22Version, scala210Version + + all { SparkVariantPlugin.SparkVariant variant -> + String scalaCompileTaskName = project.sourceSets + .getByName(variant.getSourceSetName("main")) + .getCompileTaskName("scala") + + project.configurations { + create(variant.configuration('embedded')) { + transitive = false + canBeResolved = true + } + getByName(variant.configuration('implementation')) { + extendsFrom project.configurations.getByName(variant.configuration('embedded')) } } - } -} - -println "Compiled using Scala ${project.ext.scalaMajorVersion} [${project.ext.scalaVersion}]" -String sparkVersion = spark20Version -// Revert to spark 2.2.0 for scala 2.10 as 2.3+ does not support scala 2.10 -if (project.ext.scalaMajorVersion == '2.10') { - sparkVersion = '2.2.0' -} - -tasks.withType(ScalaCompile) { ScalaCompile task -> - task.sourceCompatibility = project.ext.minimumRuntimeVersion - task.targetCompatibility = project.ext.minimumRuntimeVersion - task.options.forkOptions.executable = new File(project.ext.runtimeJavaHome, 'bin/java').absolutePath -} + // Configure main compile task + project.getTasks().getByName(scalaCompileTaskName) { ScalaCompile compileScala -> + configure(compileScala.scalaCompileOptions.forkOptions) { + memoryMaximumSize = '1g' + jvmArgs = ['-XX:MaxPermSize=512m'] + } + compileScala.scalaCompileOptions.additionalParameters = [ + "-feature", + "-unchecked", + "-deprecation", + "-Xfuture", + "-Yno-adapted-args", + "-Ywarn-dead-code", + "-Ywarn-numeric-widen", + "-Xfatal-warnings" + ] + } -compileScala { - configure(scalaCompileOptions.forkOptions) { - memoryMaximumSize = '1g' - jvmArgs = ['-XX:MaxPermSize=512m'] - } - scalaCompileOptions.additionalParameters = [ - "-feature", - "-unchecked", - "-deprecation", - "-Xfuture", - "-Yno-adapted-args", - "-Ywarn-dead-code", - "-Ywarn-numeric-widen", - "-Xfatal-warnings" - ] -} + dependencies { + add(variant.configuration('embedded'), project(":elasticsearch-hadoop-mr")) + add(variant.configuration('embedded'), project(":elasticsearch-spark")) { + capabilities { + requireCapability("org.elasticsearch.spark.variant:$variant.name:$project.version") + } + } -String coreSrc = file("$projectDir/../core").absolutePath.replace('\\','/') + add(variant.configuration('api'), "org.scala-lang:scala-library:$variant.scalaVersion") + add(variant.configuration('api'), "org.scala-lang:scala-reflect:$variant.scalaVersion") + add(variant.configuration('api'), "org.apache.spark:spark-core_${variant.scalaMajorVersion}:$variant.sparkVersion") { + exclude group: 'javax.servlet' + exclude group: 'org.apache.hadoop' + } -sourceSets { - main.scala.srcDirs += "$coreSrc/main/scala" - test.scala.srcDirs += "$coreSrc/test/scala" - itest.java.srcDirs += "$coreSrc/itest/java" - itest.scala.srcDirs += "$coreSrc/itest/scala" - itest.resources.srcDirs += "$coreSrc/itest/resources" -} + add(variant.configuration('implementation'), "org.apache.spark:spark-sql_${variant.scalaMajorVersion}:$variant.sparkVersion") { + exclude group: 'org.apache.hadoop' + } + add(variant.configuration('implementation'), "org.apache.spark:spark-streaming_${variant.scalaMajorVersion}:$variant.sparkVersion") { + exclude group: 'org.apache.hadoop' + } + add(variant.configuration('implementation'), "org.slf4j:slf4j-api:1.7.6") { + because 'spark exposes slf4j components in traits that we need to extend' + } + add(variant.configuration('implementation'), "commons-logging:commons-logging:1.1.1") + add(variant.configuration('implementation'), "javax.xml.bind:jaxb-api:2.3.1") + add(variant.configuration('implementation'), "com.google.protobuf:protobuf-java:2.5.0") + add(variant.configuration('implementation'), "org.apache.spark:spark-catalyst_${variant.scalaMajorVersion}:$variant.sparkVersion") + add(variant.configuration('implementation'), "org.apache.spark:spark-yarn_${variant.scalaMajorVersion}:$variant.sparkVersion") { + exclude group: 'org.apache.hadoop' + } -def javaFilesOnly = { FileTreeElement spec -> - spec.file.name.endsWith('.java') || spec.isDirectory() -} + // Scala compiler needs these for arcane reasons, but they are not used in the api nor the runtime + add(variant.configuration('compileOnly'), "com.fasterxml.jackson.core:jackson-annotations:2.6.7") + add(variant.configuration('compileOnly'), "org.json4s:json4s-jackson_${variant.scalaMajorVersion}:3.2.11") + add(variant.configuration('compileOnly'), "org.apache.spark:spark-tags_${variant.scalaMajorVersion}:$variant.sparkVersion") + + if ('2.10' == scalaMajorVersion) { + add(variant.configuration('implementation'), "org.apache.spark:spark-unsafe_${variant.scalaMajorVersion}:$variant.sparkVersion") + add(variant.configuration('implementation'), "org.apache.avro:avro:1.7.7") + add(variant.configuration('implementation'), "log4j:log4j:1.2.17") + add(variant.configuration('implementation'), "com.google.code.findbugs:jsr305:2.0.1") + add(variant.configuration('implementation'), "org.json4s:json4s-ast_2.10:3.2.10") + add(variant.configuration('implementation'), "com.esotericsoftware.kryo:kryo:2.21") + add(variant.configuration('compileOnly'), "org.apache.hadoop:hadoop-annotations:${project.ext.hadoopVersion}") + add(variant.configuration('compileOnly'), "org.codehaus.jackson:jackson-core-asl:${project.ext.jacksonVersion}") + add(variant.configuration('compileOnly'), "org.codehaus.jackson:jackson-mapper-asl:${project.ext.jacksonVersion}") + } -artifacts { - sourceElements(project.file("$coreSrc/main/scala")) - // Add java files from core source to javadocElements. - project.fileTree("$coreSrc/main/scala").include(javaFilesOnly).each { - javadocElements(it) - } - project.fileTree("src/main/scala").include(javaFilesOnly).each { - javadocElements(it) - } -} + add(variant.configuration('test', 'implementation'), project(":test:shared")) + add(variant.configuration('test', 'implementation'), "org.elasticsearch:securemock:1.2") + add(variant.configuration('test', 'implementation'), "org.apache.spark:spark-core_${variant.scalaMajorVersion}:$variant.sparkVersion") { + exclude group: 'javax.servlet' + exclude group: 'org.apache.hadoop' + } + add(variant.configuration('test', 'implementation'), "org.apache.spark:spark-sql_${variant.scalaMajorVersion}:$variant.sparkVersion") { + exclude group: 'org.apache.hadoop' + } -// currently the outside project folders are transformed into linked resources however -// Gradle only supports one so the project will be invalid as not all sources will be in there -// as such, they are setup here manually for Eclipse. IntelliJ probably needs a similar approach -eclipse { - project.file.whenMerged { pj -> - // eliminated resources created by gradle + add(variant.configuration('itest', 'implementation'), project(":test:shared")) + add(variant.configuration('itest', 'implementation'), "org.apache.spark:spark-yarn_${variant.scalaMajorVersion}:$variant.sparkVersion") { + exclude group: 'org.apache.hadoop' + } + add(variant.configuration('itest', 'implementation'), "org.apache.spark:spark-streaming_${variant.scalaMajorVersion}:$variant.sparkVersion") { + exclude group: 'org.apache.hadoop' + } - linkedResources.clear() - linkedResources.add(new org.gradle.plugins.ide.eclipse.model.Link("core/main/scala", "2", "$coreSrc/main/scala", null)) - linkedResources.add(new org.gradle.plugins.ide.eclipse.model.Link("core/test/scala", "2", "$coreSrc/test/scala", null)) - linkedResources.add(new org.gradle.plugins.ide.eclipse.model.Link("core/itest/java", "2", "$coreSrc/itest/java", null)) - linkedResources.add(new org.gradle.plugins.ide.eclipse.model.Link("core/itest/scala", "2", "$coreSrc/itest/scala", null)) - linkedResources.add(new org.gradle.plugins.ide.eclipse.model.Link("core/itest/resources","2", "$coreSrc/itest/resources", null)) + add(variant.configuration('additionalSources'), project(":elasticsearch-hadoop-mr")) + add(variant.configuration('javadocSources'), project(":elasticsearch-hadoop-mr")) - } - classpath.file { - whenMerged { cp -> - entries.removeAll { entry -> - entry.kind == 'src' && (entry.path in ["scala", "java", "resources"] || entry.path.startsWith("itest-") || entry.path.endsWith("-scala")) + add(variant.configuration('additionalSources'), project(":elasticsearch-spark")) { + capabilities { + requireCapability("org.elasticsearch.spark.variant:$variant.name:$project.version") + } + } + add(variant.configuration('javadocSources'), project(":elasticsearch-spark")) { + capabilities { + requireCapability("org.elasticsearch.spark.variant:$variant.name:$project.version") + } } - - entries.add(new org.gradle.plugins.ide.eclipse.model.SourceFolder("core/main/scala", null)) - entries.add(new org.gradle.plugins.ide.eclipse.model.SourceFolder("core/test/scala", null)) - entries.add(new org.gradle.plugins.ide.eclipse.model.SourceFolder("core/itest/java", null)) - entries.add(new org.gradle.plugins.ide.eclipse.model.SourceFolder("core/itest/scala", null)) - entries.add(new org.gradle.plugins.ide.eclipse.model.SourceFolder("core/itest/resources", null)) } - } -} -dependencies { - embedded(project(":elasticsearch-hadoop-mr")) - - api("org.scala-lang:scala-library:$scalaVersion") - api("org.scala-lang:scala-reflect:$scalaVersion") - api("org.apache.spark:spark-core_${project.ext.scalaMajorVersion}:$sparkVersion") { - exclude group: 'javax.servlet' - exclude group: 'org.apache.hadoop' - } - - implementation("org.apache.spark:spark-sql_${project.ext.scalaMajorVersion}:$sparkVersion") { - exclude group: 'org.apache.hadoop' - } - implementation("org.apache.spark:spark-streaming_${project.ext.scalaMajorVersion}:$sparkVersion") { - exclude group: 'org.apache.hadoop' - } - implementation("org.slf4j:slf4j-api:1.7.6") { - because 'spark exposes slf4j components in traits that we need to extend' - } - implementation("commons-logging:commons-logging:1.1.1") - implementation("javax.xml.bind:jaxb-api:2.3.1") - implementation("org.apache.spark:spark-catalyst_${project.ext.scalaMajorVersion}:$sparkVersion") - implementation("org.apache.spark:spark-yarn_${project.ext.scalaMajorVersion}:$sparkVersion") { - exclude group: 'org.apache.hadoop' - } - - // Scala compiler needs these for arcane reasons, but they are not used in the api nor the runtime - compileOnly("com.fasterxml.jackson.core:jackson-annotations:2.6.7") - compileOnly("org.json4s:json4s-jackson_${project.ext.scalaMajorVersion}:3.2.11") - compileOnly("org.apache.spark:spark-tags_${project.ext.scalaMajorVersion}:$sparkVersion") - - if ('2.10' == scalaMajorVersion) { - implementation("org.apache.spark:spark-unsafe_${project.ext.scalaMajorVersion}:$sparkVersion") - implementation("org.apache.avro:avro:1.7.7") - implementation("log4j:log4j:1.2.17") - implementation("com.google.code.findbugs:jsr305:2.0.1") - implementation("org.json4s:json4s-ast_2.10:3.2.10") - implementation("com.esotericsoftware.kryo:kryo:2.21") - compileOnly("org.apache.hadoop:hadoop-annotations:${project.ext.hadoopVersion}") - compileOnly("org.codehaus.jackson:jackson-core-asl:${project.ext.jacksonVersion}") - compileOnly("org.codehaus.jackson:jackson-mapper-asl:${project.ext.jacksonVersion}") - } + def javaFilesOnly = { FileTreeElement spec -> + spec.file.name.endsWith('.java') || spec.isDirectory() + } - testImplementation(project(":test:shared")) - testImplementation(project.ext.hadoopClient) - testImplementation("org.elasticsearch:securemock:1.2") - testImplementation("org.apache.spark:spark-core_${project.ext.scalaMajorVersion}:$sparkVersion") { - exclude group: 'javax.servlet' - exclude group: 'org.apache.hadoop' - } - testImplementation("org.apache.spark:spark-sql_${project.ext.scalaMajorVersion}:$sparkVersion") { - exclude group: 'org.apache.hadoop' - } - - itestImplementation(project(":test:shared")) - itestImplementation("org.apache.spark:spark-yarn_${project.ext.scalaMajorVersion}:$sparkVersion") { - exclude group: 'org.apache.hadoop' - } - itestImplementation("org.apache.spark:spark-streaming_${project.ext.scalaMajorVersion}:$sparkVersion") { - exclude group: 'org.apache.hadoop' - } + // Add java files from scala source set to javadocSourceElements. + project.fileTree("src/main/scala").include(javaFilesOnly).each { + project.artifacts.add(variant.configuration('javadocSourceElements'), it) + } - additionalSources(project(":elasticsearch-hadoop-mr")) - javadocSources(project(":elasticsearch-hadoop-mr")) -} + if (variant.scalaMajorVersion != '2.10') { + // Configure java source generation for javadoc purposes + String generatedJavaDirectory = "$buildDir/generated/java/${variant.name}" + Configuration scalaCompilerPlugin = project.configurations.maybeCreate(variant.configuration('scalaCompilerPlugin')) + scalaCompilerPlugin.defaultDependencies { dependencies -> + dependencies.add(project.dependencies.create("com.typesafe.genjavadoc:genjavadoc-plugin_${variant.scalaVersion}:0.13")) + } -// Export generated Java code from the genjavadoc compiler plugin -artifacts { - javadocElements(project.file("$buildDir/generated/java")) { - builtBy compileScala - } -} + ScalaCompile compileScala = tasks.getByName(scalaCompileTaskName) as ScalaCompile + compileScala.scalaCompileOptions.with { + additionalParameters = [ + "-Xplugin:" + configurations.getByName(variant.configuration('scalaCompilerPlugin')).asPath, + "-P:genjavadoc:out=$generatedJavaDirectory".toString() + ] + } + // Export generated Java code from the genjavadoc compiler plugin + artifacts { + add(variant.configuration('javadocSourceElements'), project.file(generatedJavaDirectory)) { + builtBy compileScala + } + } + tasks.getByName(variant.taskName('javadoc')) { + dependsOn compileScala + source(generatedJavaDirectory) + } + } -jar { - dependsOn(project.configurations.embedded) - from(project.configurations.embedded.collect { it.isDirectory() ? it : zipTree(it)}) { - include "org/elasticsearch/hadoop/**" - include "esh-build.properties" - include "META-INF/services/*" + scaladoc { + title = "${rootProject.description} ${version} API" + } } } -javadoc { - if (project.ext.scalaMajorVersion != '2.10') { - dependsOn compileScala - source += "$buildDir/generated/java" +// deal with the messy conflicts out there +// Ignore the scalaCompilerPlugin configurations since it is immediately resolved to configure the scala compiler tasks +configurations.matching{ it.name.contains('CompilerPlugin') == false }.all { Configuration conf -> + conf.resolutionStrategy { + eachDependency { details -> + // change all javax.servlet artifacts to the one used by Spark otherwise these will lead to + // SecurityException (signer information wrong) + if (details.requested.name.contains("servlet") && !details.requested.name.contains("guice")) { + details.useTarget group: "org.eclipse.jetty.orbit", name: "javax.servlet", version: "3.0.0.v201112011016" + } + } } + conf.exclude group: "org.mortbay.jetty" } -scaladoc { - title = "${rootProject.description} ${version} API" +tasks.withType(ScalaCompile) { ScalaCompile task -> + task.sourceCompatibility = project.ext.minimumRuntimeVersion + task.targetCompatibility = project.ext.minimumRuntimeVersion + task.options.forkOptions.executable = new File(project.ext.runtimeJavaHome, 'bin/java').absolutePath } -tasks.withType(ScalaCompile) { - if (project.ext.scalaMajorVersion != '2.10') { - scalaCompileOptions.with { - additionalParameters = [ - "-Xplugin:" + configurations.scalaCompilerPlugin.asPath, - "-P:genjavadoc:out=$buildDir/generated/java".toString() - ] +// Embed the embedded dependencies in the final jar after all configuration is complete +sparkVariants { + all { SparkVariantPlugin.SparkVariant variant -> + tasks.getByName(variant.taskName('jar')) { + dependsOn(project.configurations.getByName(variant.configuration('embedded'))) + // TODO: Is there a way to do this lazily? This looks like it resolves the configuration. + from(project.configurations.getByName(variant.configuration('embedded')).collect { it.isDirectory() ? it : zipTree(it)}) { + include "org/elasticsearch/**" + include "esh-build.properties" + include "META-INF/services/*" + } } } } diff --git a/spark/sql-20/licenses/protobuf-java-2.5.0.jar.sha1 b/spark/sql-20/licenses/protobuf-java-2.5.0.jar.sha1 new file mode 100644 index 000000000..71f918819 --- /dev/null +++ b/spark/sql-20/licenses/protobuf-java-2.5.0.jar.sha1 @@ -0,0 +1 @@ +a10732c76bfacdbd633a7eb0f7968b1059a65dfa \ No newline at end of file diff --git a/spark/sql-20/licenses/protobuf-java-LICENSE.txt b/spark/sql-20/licenses/protobuf-java-LICENSE.txt new file mode 100644 index 000000000..97a6e3d19 --- /dev/null +++ b/spark/sql-20/licenses/protobuf-java-LICENSE.txt @@ -0,0 +1,32 @@ +Copyright 2008 Google Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Code generated by the Protocol Buffer compiler is owned by the owner +of the input file used when generating it. This code is not +standalone and requires a support library to be linked with it. This +support library is itself covered by the above license. \ No newline at end of file diff --git a/spark/sql-20/licenses/protobuf-java-NOTICE.txt b/spark/sql-20/licenses/protobuf-java-NOTICE.txt new file mode 100644 index 000000000..e69de29bb diff --git a/spark/sql-20/licenses/spark-catalyst_2.11-2.3.0.jar.sha1 b/spark/sql-20/licenses/spark-catalyst_2.11-2.3.0.jar.sha1 deleted file mode 100644 index d4d7e2e72..000000000 --- a/spark/sql-20/licenses/spark-catalyst_2.11-2.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -89b1654b97bfe35e466e7511fd5b11033c2bbbd9 \ No newline at end of file diff --git a/spark/sql-20/licenses/spark-catalyst_2.11-2.4.4.jar.sha1 b/spark/sql-20/licenses/spark-catalyst_2.11-2.4.4.jar.sha1 new file mode 100644 index 000000000..c2a134ad3 --- /dev/null +++ b/spark/sql-20/licenses/spark-catalyst_2.11-2.4.4.jar.sha1 @@ -0,0 +1 @@ +ba9237eac7523f0d61e104bc6c35f01240020241 \ No newline at end of file diff --git a/spark/sql-20/licenses/spark-core_2.11-2.3.0.jar.sha1 b/spark/sql-20/licenses/spark-core_2.11-2.3.0.jar.sha1 deleted file mode 100644 index 44379c147..000000000 --- a/spark/sql-20/licenses/spark-core_2.11-2.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -09e2bc021bd38b06da2e0a56fdd9d13935503d94 \ No newline at end of file diff --git a/spark/sql-20/licenses/spark-core_2.11-2.4.4.jar.sha1 b/spark/sql-20/licenses/spark-core_2.11-2.4.4.jar.sha1 new file mode 100644 index 000000000..87bdd6969 --- /dev/null +++ b/spark/sql-20/licenses/spark-core_2.11-2.4.4.jar.sha1 @@ -0,0 +1 @@ +98226adb9bd1fb12479f5da1888e22b0fc89e1aa \ No newline at end of file diff --git a/spark/sql-20/licenses/spark-sql_2.11-2.3.0.jar.sha1 b/spark/sql-20/licenses/spark-sql_2.11-2.3.0.jar.sha1 deleted file mode 100644 index e519c1764..000000000 --- a/spark/sql-20/licenses/spark-sql_2.11-2.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -e19282137294c8a889917c05006931fbcd8d66d2 \ No newline at end of file diff --git a/spark/sql-20/licenses/spark-sql_2.11-2.4.4.jar.sha1 b/spark/sql-20/licenses/spark-sql_2.11-2.4.4.jar.sha1 new file mode 100644 index 000000000..c5633efa7 --- /dev/null +++ b/spark/sql-20/licenses/spark-sql_2.11-2.4.4.jar.sha1 @@ -0,0 +1 @@ +e6a748c169978b0d070002f7849e4edc9bbb3db4 \ No newline at end of file diff --git a/spark/sql-20/licenses/spark-streaming_2.11-2.3.0.jar.sha1 b/spark/sql-20/licenses/spark-streaming_2.11-2.3.0.jar.sha1 deleted file mode 100644 index 133766241..000000000 --- a/spark/sql-20/licenses/spark-streaming_2.11-2.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -57da1135f7192a2be85987f1708abf94887f7323 \ No newline at end of file diff --git a/spark/sql-20/licenses/spark-streaming_2.11-2.4.4.jar.sha1 b/spark/sql-20/licenses/spark-streaming_2.11-2.4.4.jar.sha1 new file mode 100644 index 000000000..20790a140 --- /dev/null +++ b/spark/sql-20/licenses/spark-streaming_2.11-2.4.4.jar.sha1 @@ -0,0 +1 @@ +447fbcc8e6799e072a22e2fb404f6f0ea6d1f143 \ No newline at end of file diff --git a/spark/sql-20/licenses/spark-yarn_2.11-2.3.0.jar.sha1 b/spark/sql-20/licenses/spark-yarn_2.11-2.3.0.jar.sha1 deleted file mode 100644 index 1877f6a67..000000000 --- a/spark/sql-20/licenses/spark-yarn_2.11-2.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -dffb93bc7154b1242155585fd117425c40d70bd4 \ No newline at end of file diff --git a/spark/sql-20/licenses/spark-yarn_2.11-2.4.4.jar.sha1 b/spark/sql-20/licenses/spark-yarn_2.11-2.4.4.jar.sha1 new file mode 100644 index 000000000..743c65fa7 --- /dev/null +++ b/spark/sql-20/licenses/spark-yarn_2.11-2.4.4.jar.sha1 @@ -0,0 +1 @@ +cc2bd27ca1ae0368e06e243e652a5d4f62258121 \ No newline at end of file diff --git a/spark/sql-20/src/itest/java/org/elasticsearch/spark/integration/SparkUtils.java b/spark/sql-20/src/itest/java/org/elasticsearch/spark/integration/SparkUtils.java new file mode 100644 index 000000000..b49e49b80 --- /dev/null +++ b/spark/sql-20/src/itest/java/org/elasticsearch/spark/integration/SparkUtils.java @@ -0,0 +1,40 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.spark.integration; + +import com.esotericsoftware.kryo.Kryo; +import org.apache.spark.SparkConf; +import org.elasticsearch.hadoop.Provisioner; +import org.elasticsearch.hadoop.util.ReflectionUtils; + +import java.lang.reflect.Constructor; + +public abstract class SparkUtils { + + public static final String[] ES_SPARK_TESTING_JAR = new String[] {Provisioner.ESHADOOP_TESTING_JAR}; + + public static Kryo sparkSerializer(SparkConf conf) throws Exception { + // reflection galore + Class ks = Class.forName("org.apache.spark.serializer.KryoSerializer", true, conf.getClass().getClassLoader()); + Constructor ctr = ks.getDeclaredConstructor(SparkConf.class); + Object ksInstance = ctr.newInstance(conf); + Kryo kryo = ReflectionUtils.invoke(ReflectionUtils.findMethod(ks, "newKryo"), ksInstance); + return kryo; + } +} diff --git a/spark/sql-20/src/itest/resources/basic.json b/spark/sql-20/src/itest/resources/basic.json new file mode 100644 index 000000000..c4538e9c4 --- /dev/null +++ b/spark/sql-20/src/itest/resources/basic.json @@ -0,0 +1 @@ +{ "firstName": "John", "isAlive": true, "age": 25, "children": ["Alex", "Joe"], "address": { "streetAddress": "21 2nd Street" } } diff --git a/spark/sql-20/src/itest/resources/simple.json b/spark/sql-20/src/itest/resources/simple.json new file mode 100644 index 000000000..715b02d41 --- /dev/null +++ b/spark/sql-20/src/itest/resources/simple.json @@ -0,0 +1 @@ +{"key":"value"} \ No newline at end of file diff --git a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsScalaSparkStreaming.scala b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsScalaSparkStreaming.scala index e2cde4b74..ff5d76880 100644 --- a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsScalaSparkStreaming.scala +++ b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsScalaSparkStreaming.scala @@ -37,7 +37,7 @@ import org.elasticsearch.hadoop.util.TestUtils.docEndpoint import org.elasticsearch.hadoop.util.{EsMajorVersion, StringUtils, TestSettings} import org.elasticsearch.spark.rdd.EsSpark import org.elasticsearch.spark.rdd.Metadata._ -import org.elasticsearch.spark.serialization.{Bean, ReflectionUtils} +import org.elasticsearch.spark.serialization.{Bean, Garbage, ModuleCaseClass, ReflectionUtils, Trip} import org.elasticsearch.spark.streaming._ import org.hamcrest.Matchers._ import org.junit.Assert._ @@ -151,7 +151,7 @@ class AbstractScalaEsScalaSparkStreaming(val prefix: String, readMetadata: jl.Bo def testEsRDDWriteCaseClass(): Unit = { val javaBean = new Bean("bar", 1, true) val caseClass1 = Trip("OTP", "SFO") - val caseClass2 = AbstractScalaEsScalaSpark.ModuleCaseClass(1, "OTP", "MUC") + val caseClass2 = ModuleCaseClass(1, "OTP", "MUC") val vals = ReflectionUtils.caseClassValues(caseClass2) diff --git a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkStructuredStreaming.scala b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkStructuredStreaming.scala index fd614b025..76eaaf69b 100644 --- a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkStructuredStreaming.scala +++ b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkStructuredStreaming.scala @@ -25,8 +25,9 @@ import java.sql.Timestamp import java.util.concurrent.TimeUnit import java.{lang => jl} import java.{util => ju} -import javax.xml.bind.DatatypeConverter +import javax.xml.bind.DatatypeConverter +import org.apache.hadoop.fs.Path import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import org.apache.spark.sql.internal.SQLConf @@ -54,15 +55,9 @@ import org.elasticsearch.spark.sql.streaming.StreamingQueryTestHarness import org.hamcrest.Matchers.containsString import org.hamcrest.Matchers.is import org.hamcrest.Matchers.not -import org.junit.AfterClass -import org.junit.Assert +import org.junit.{AfterClass, Assert, Assume, BeforeClass, ClassRule, FixMethodOrder, Rule, Test} import org.junit.Assert.assertThat import org.junit.Assert.assertTrue -import org.junit.BeforeClass -import org.junit.ClassRule -import org.junit.FixMethodOrder -import org.junit.Rule -import org.junit.Test import org.junit.rules.TemporaryFolder import org.junit.runner.RunWith import org.junit.runners.MethodSorters @@ -474,12 +469,20 @@ class AbstractScalaEsSparkStructuredStreaming(prefix: String, something: Boolean val target = wrapIndex(resource("test-tech-{name}", "data", version)) val test = new StreamingQueryTestHarness[Record](spark) + // Spark passes the checkpoint name to Hadoop's Path class, which encodes the curly braces. + // The HDFS client doesn't seem to encode this path consistently. It creates the un-encoded + // file, encodes path name, then checks for the file existing, which fails because the name + // is different. + val checkpointName = checkpoint(target.replace("{", "").replace("}", "")) + Assume.assumeTrue("Checkpoint path is encoded improperly", + checkpointName.equals(new Path(checkpointName).toUri.toString)) + test.withInput(Record(1, "spark")) .withInput(Record(2, "hadoop")) .runTest { test.stream .writeStream - .option("checkpointLocation", checkpoint(target)) + .option("checkpointLocation", checkpointName) .format("es") .start(target) } diff --git a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/Bean.java b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/Bean.java new file mode 100644 index 000000000..1992c35ed --- /dev/null +++ b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/Bean.java @@ -0,0 +1,51 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.spark.serialization; + +import java.io.Serializable; + +public class Bean implements Serializable { + + private String foo; + private Number id; + private boolean bool; + + public Bean() {} + + public Bean(String foo, Number bar, boolean bool) { + this.foo = foo; + this.id = bar; + this.bool = bool; + } + public String getFoo() { + return foo; + } + public void setFoo(String foo) { + this.foo = foo; + } + public Number getId() { + return id; + } + public void setBar(Number bar) { + this.id = bar; + } + public boolean isBool() { + return bool; + } +} diff --git a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/Garbage.scala b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/Garbage.scala new file mode 100644 index 000000000..7d97118c5 --- /dev/null +++ b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/Garbage.scala @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.spark.serialization + +class Garbage(i: Int) { + def doNothing(): Unit = () +} diff --git a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/ModuleCaseClass.scala b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/ModuleCaseClass.scala new file mode 100644 index 000000000..ed2b88897 --- /dev/null +++ b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/ModuleCaseClass.scala @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.spark.serialization + +case class ModuleCaseClass(id: Integer, departure: String, var arrival: String) { + var l = math.Pi +} diff --git a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/Trip.scala b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/Trip.scala new file mode 100644 index 000000000..dfa879eb9 --- /dev/null +++ b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/serialization/Trip.scala @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.spark.serialization + +case class Trip(departure: String, arrival: String) { + var extra = math.Pi +} diff --git a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/sql/ServiceLoadingTest.scala b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/sql/ServiceLoadingTest.scala new file mode 100644 index 000000000..29177f6b2 --- /dev/null +++ b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/sql/ServiceLoadingTest.scala @@ -0,0 +1,39 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.spark.sql + +import java.util.ServiceLoader + +import org.apache.spark.sql.sources.DataSourceRegister +import org.junit.{Assert, Test} + +import scala.collection.JavaConverters._ + +class ServiceLoadingTest { + + @Test + def serviceLoadingTest(): Unit = { + val serviceLoader = ServiceLoader.load(classOf[DataSourceRegister], Thread.currentThread().getContextClassLoader) + if (serviceLoader.asScala.map(_.shortName()).exists(_.equals("es")) == false) { + Assert.fail("Cannot locate 'es' data source") + } + } + +} diff --git a/storm/build.gradle b/storm/build.gradle index 3907161d4..6fc3a9cac 100644 --- a/storm/build.gradle +++ b/storm/build.gradle @@ -45,4 +45,19 @@ jar { } } +// add clojars repo to pom +publishing { + publications { + main { + getPom().withXml { XmlProvider xml -> + Node root = xml.asNode() + Node repositories = root.appendNode('repositories') + Node repository = repositories.appendNode('repository') + repository.appendNode('id', 'clojars.org') + repository.appendNode('url', 'https://clojars.org/repo') + } + } + } +} + tasks.getByName('integrationTest').enabled = false