Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,7 @@ docker_image_default_repo_prefix=beam_

# supported flink versions
flink_versions=1.17,1.18,1.19,1.20,2.0
# supported spark versions
spark_versions=3
# supported python versions
python_versions=3.10,3.11,3.12,3.13,3.14
2 changes: 1 addition & 1 deletion runners/spark/3/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
def basePath = '..'
/* All properties required for loading the Spark build script */
project.ext {
spark_major = '3'
// Spark 3 version as defined in BeamModulePlugin
spark_version = spark3_version
spark_scala_version = '2.12'
copySourceBase = false // disabled to use Spark 3 as primary dev version
archives_base_name = 'beam-runners-spark-3'
}

Expand Down
112 changes: 88 additions & 24 deletions runners/spark/spark_runner.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -89,38 +89,102 @@ def hadoopVersions = [

hadoopVersions.each { kv -> configurations.create("hadoopVersion$kv.key") }

def sourceBase = "${project.projectDir}/../src"
def sourceBaseCopy = "${project.buildDir}/sourcebase/src"

def useCopiedSourceSet = { scope, type, trigger ->
def taskName = "copy${scope.capitalize()}${type.capitalize()}"
trigger.dependsOn tasks.register(taskName, Copy) {
from "$sourceBase/$scope/$type"
into "$sourceBaseCopy/$scope/$type"
duplicatesStrategy DuplicatesStrategy.INCLUDE
/*
* Per-version source overrides (mirrors runners/flink/flink_runner.gradle).
*
* Layout:
* runners/spark/src/ -- shared base (lowest supported version uses these directly)
* runners/spark/<major>/src/ -- version-specific overrides (later overrides win)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: rephrase the comments a bit to make it plain descriptive (win -> takes precedence)

*
* The lowest supported `spark_major` builds straight from the shared base.
* Higher versions copy <shared> + <previous majors> + <current> into a single
* source-overrides directory using DuplicatesStrategy.INCLUDE so the current
* version's files override earlier ones.
*/
def base_path = ".."

def overrides = { versions, type, group = 'java' ->
// order matters: later entries override earlier ones during the Copy
["${base_path}/src/${type}/${group}"] +
versions.collect { "${base_path}/${it}/src/${type}/${group}" } +
["./src/${type}/${group}"]
}

def all_versions = spark_versions.split(",").collect { it.trim() }
// Determine version order by list position rather than string comparison so two-digit
// majors (e.g. "10") still sort after single-digit ones.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a good catch, as a follow up we may fix the logic in flink runner gradle:

def previous_versions = all_versions.findAll { it < flink_major }

def spark_major_index = all_versions.indexOf(spark_major)
if (spark_major_index < 0) {
throw new GradleException(
"spark_major='${spark_major}' is not listed in spark_versions='${spark_versions}' " +
"(see root gradle.properties).")
}
def previous_versions = spark_major_index > 0 ? all_versions.subList(0, spark_major_index) : []

def main_source_overrides = overrides(previous_versions, "main")
def test_source_overrides = overrides(previous_versions, "test")
def main_resources_overrides = overrides(previous_versions, "main", "resources")
def test_resources_overrides = overrides(previous_versions, "test", "resources")

def sourceOverridesBase = project.layout.buildDirectory.dir('source-overrides/src').get()

def copySourceOverrides = tasks.register('copySourceOverrides', Copy) { copyTask ->
copyTask.from main_source_overrides
copyTask.into "${sourceOverridesBase}/main/java"
copyTask.duplicatesStrategy DuplicatesStrategy.INCLUDE
if (project.ext.has('excluded_files') && project.ext.excluded_files.containsKey('main')) {
project.ext.excluded_files.main.each { f -> copyTask.exclude "**/${f}" }
}
// append copied sources to srcDirs
sourceSets."$scope"."$type".srcDirs "$sourceBaseCopy/$scope/$type"
}

if (copySourceBase) {
// Copy source base into build directory.
// While this is not necessary, having multiple source sets referencing the same shared base will typically confuse an IDE and harm developer experience.
// The copySourceBase flag can be swapped without any implications and allows to pick a main version that is actively worked on.
useCopiedSourceSet("main", "java", compileJava)
useCopiedSourceSet("main", "resources", processResources)
useCopiedSourceSet("test", "java", compileTestJava)
useCopiedSourceSet("test", "resources", processTestResources)
def copyResourcesOverrides = tasks.register('copyResourcesOverrides', Copy) {
it.from main_resources_overrides
it.into "${sourceOverridesBase}/main/resources"
it.duplicatesStrategy DuplicatesStrategy.INCLUDE
}

def copyTestSourceOverrides = tasks.register('copyTestSourceOverrides', Copy) { copyTask ->
copyTask.from test_source_overrides
copyTask.into "${sourceOverridesBase}/test/java"
copyTask.duplicatesStrategy DuplicatesStrategy.INCLUDE
if (project.ext.has('excluded_files') && project.ext.excluded_files.containsKey('test')) {
project.ext.excluded_files.test.each { f -> copyTask.exclude "**/${f}" }
}
}

def copyTestResourcesOverrides = tasks.register('copyTestResourcesOverrides', Copy) {
it.from test_resources_overrides
it.into "${sourceOverridesBase}/test/resources"
it.duplicatesStrategy DuplicatesStrategy.INCLUDE
}

def use_override = (spark_major_index > 0)
def sourceBase = "${project.projectDir}/../src"

if (use_override) {
// Pin srcDirs to the Copy task providers so each higher version sees only its merged
// overrides tree. Passing the TaskProviders here lets Gradle auto-wire task dependencies
// for every consumer (compile, javadoc, sources jar, etc.) without manual dependsOn.
sourceSets {
main {
java { srcDirs = [copySourceOverrides] }
resources { srcDirs = [copyResourcesOverrides] }
}
test {
java { srcDirs = [copyTestSourceOverrides] }
resources { srcDirs = [copyTestResourcesOverrides] }
}
}
} else {
// append shared base sources to srcDirs
// Lowest supported Spark version: build straight from the shared base, no copy step.
sourceSets {
main {
java.srcDirs "${sourceBase}/main/java"
resources.srcDirs "${sourceBase}/main/resources"
java { srcDirs = ["${sourceBase}/main/java"] }
resources { srcDirs = ["${sourceBase}/main/resources"] }
}
test {
java.srcDirs "${sourceBase}/test/java"
resources.srcDirs "${sourceBase}/test/resources"
java { srcDirs = ["${sourceBase}/test/java"] }
resources { srcDirs = ["${sourceBase}/test/resources"] }
Comment thread
tkaymak marked this conversation as resolved.
}
}
}
Expand Down
Loading