From 8b8bdeec1969305f51431b54bf2baa22f3dfa7eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JB=20Onofr=C3=A9?= Date: Wed, 13 Dec 2023 15:19:39 +0100 Subject: [PATCH] GH-39214: [Java] Support reproducible build --- cpp/submodules/parquet-testing | 2 +- dev/release/01-prepare-test.rb | 21 ++++---- dev/release/post-11-bump-versions-test.rb | 53 ++++++------------- dev/release/test-helper.rb | 47 +++++++++++++++- java/adapter/avro/pom.xml | 4 ++ java/adapter/jdbc/pom.xml | 4 ++ java/algorithm/pom.xml | 4 ++ java/bom/pom.xml | 3 +- java/c/pom.xml | 1 + java/compression/pom.xml | 4 ++ java/dataset/pom.xml | 1 + java/flight/flight-core/pom.xml | 3 +- java/flight/flight-grpc/pom.xml | 1 + java/flight/flight-integration-tests/pom.xml | 6 ++- java/flight/flight-sql-jdbc-core/pom.xml | 1 + java/flight/flight-sql-jdbc-driver/pom.xml | 4 ++ java/flight/flight-sql/pom.xml | 1 + java/format/pom.xml | 4 ++ java/gandiva/pom.xml | 1 + .../module-info-compiler-maven-plugin/pom.xml | 1 + java/memory/memory-core/pom.xml | 4 ++ java/memory/memory-netty/pom.xml | 4 ++ java/memory/memory-unsafe/pom.xml | 3 ++ java/performance/pom.xml | 1 + java/pom.xml | 10 +++- java/tools/pom.xml | 6 ++- java/vector/pom.xml | 5 +- 27 files changed, 144 insertions(+), 55 deletions(-) diff --git a/cpp/submodules/parquet-testing b/cpp/submodules/parquet-testing index 4cb3cff24c965..d69d979223e88 160000 --- a/cpp/submodules/parquet-testing +++ b/cpp/submodules/parquet-testing @@ -1 +1 @@ -Subproject commit 4cb3cff24c965fb329cdae763eabce47395a68a0 +Subproject commit d69d979223e883faef9dc6fe3cf573087243c28a diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index 8fb23f45f0f3a..f4bf01f1f6f9b 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -264,18 +264,17 @@ def test_version_pre_tag end Dir.glob("java/**/pom.xml") do |path| - version = "#{@snapshot_version}" - lines = File.readlines(path, chomp: true) - target_lines = lines.grep(/#{Regexp.escape(version)}/) - hunks = [] - target_lines.each do |line| - new_line = line.gsub(@snapshot_version) do - @release_version + hunks = generate_hunks(File.readlines(path, chomp: true)) do |line| + if line.include?("#{@snapshot_version}") + new_line = line.gsub(@snapshot_version) do + @release_version + end + [line, new_line] + elsif line.include?("") + [line, normalize_pom_xml_output_timestamp(line)] + else + [nil, nil] end - hunks << [ - "-#{line}", - "+#{new_line}", - ] end expected_changes << {hunks: hunks, path: path} end diff --git a/dev/release/post-11-bump-versions-test.rb b/dev/release/post-11-bump-versions-test.rb index 4b6933d6102a9..08a900f71c7d0 100644 --- a/dev/release/post-11-bump-versions-test.rb +++ b/dev/release/post-11-bump-versions-test.rb @@ -244,37 +244,19 @@ def test_version_post_tag end import_path = "github.com/apache/arrow/go/v#{@snapshot_major_version}" - hunks = [] if release_type == :major - lines = File.readlines(path, chomp: true) - target_lines = lines.each_with_index.select do |line, i| - line.include?(import_path) - end - next if target_lines.empty? - n_context_lines = 3 # The default of Git's diff.context - target_hunks = [[target_lines.first[0]]] - previous_i = target_lines.first[1] - target_lines[1..-1].each do |line, i| - if i - previous_i < n_context_lines - target_hunks.last << line - else - target_hunks << [line] - end - previous_i = i - end - target_hunks.each do |lines| - hunk = [] - lines.each do |line,| - hunk << "-#{line}" - end - lines.each do |line| + hunks = generate_hunks(File.readlines(path, chomp: true)) do |line| + if line.include?(import_path) new_line = line.gsub("v#{@snapshot_major_version}") do "v#{@next_major_version}" end - hunk << "+#{new_line}" + [line, new_line] + else + [nil, nil] end - hunks << hunk end + else + hunks = [] end if path == "go/parquet/writer_properties.go" hunks << [ @@ -287,18 +269,17 @@ def test_version_post_tag end Dir.glob("java/**/pom.xml") do |path| - version = "#{@snapshot_version}" - lines = File.readlines(path, chomp: true) - target_lines = lines.grep(/#{Regexp.escape(version)}/) - hunks = [] - target_lines.each do |line| - new_line = line.gsub(@snapshot_version) do - @next_snapshot_version + hunks = generate_hunks(File.readlines(path, chomp: true)) do |line| + if line.include?("#{@snapshot_version}") + new_line = line.gsub(@snapshot_version) do + @next_snapshot_version + end + [line, new_line] + elsif line.include?("") + [line, normalize_pom_xml_output_timestamp(line)] + else + [nil, nil] end - hunks << [ - "-#{line}", - "+#{new_line}", - ] end expected_changes << {hunks: hunks, path: path} end diff --git a/dev/release/test-helper.rb b/dev/release/test-helper.rb index 3b2c3aa6e5874..1b840c75a481f 100644 --- a/dev/release/test-helper.rb +++ b/dev/release/test-helper.rb @@ -83,15 +83,60 @@ def parse_patch(patch) when /\A@@/ in_hunk = true diffs.last[:hunks] << [] - when /\A[-+]/ + when /\A-/ next unless in_hunk diffs.last[:hunks].last << line.chomp + when /\A\+/ + next unless in_hunk + diffs.last[:hunks].last << normalize_added_line(line.chomp) end end diffs.sort_by do |diff| diff[:path] end end + + def generate_hunks(lines) + git_diff_context = 3 # The default of Git's diff.context + max_lines_for_same_hunk = git_diff_context * 2 + 1 + previous_i = nil + grouped_change_blocks = [] + lines.each_with_index do |line, i| + deleted, added = yield(line) + next if deleted.nil? and added.nil? + if previous_i.nil? or (i - previous_i) > max_lines_for_same_hunk + grouped_change_blocks << [] + end + if i - 1 != previous_i + grouped_change_blocks.last << [] + end + grouped_change_blocks.last.last << [deleted, added] + previous_i = i + end + grouped_change_blocks.collect do |change_blocks| + hunk = [] + change_blocks.each do |continuous_changes| + continuous_changes.each do |deleted, _| + hunk << "-#{deleted}" if deleted + end + continuous_changes.each do |_, added| + hunk << "+#{added}" if added + end + end + hunk + end + end + + def normalize_pom_xml_output_timestamp(line) + line.gsub(/.+?2023-12-13T00:00:00Z<" + end + end + + def normalize_added_line(line) + normalize_pom_xml_output_timestamp(line) + end + end module VersionDetectable diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml index c0410ea4c2314..185b997eb2b54 100644 --- a/java/adapter/avro/pom.xml +++ b/java/adapter/avro/pom.xml @@ -25,6 +25,10 @@ (Contrib/Experimental) A library for converting Avro data to Arrow data. http://maven.apache.org + + 2023-12-13T00:00:00Z + + diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index f95956d1f61d5..12e953e7af6d2 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -25,6 +25,10 @@ (Contrib/Experimental)A library for converting JDBC data to Arrow data. http://maven.apache.org + + 2023-12-13T00:00:00Z + + diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml index 3e32d955ec417..0583b444fb25b 100644 --- a/java/algorithm/pom.xml +++ b/java/algorithm/pom.xml @@ -20,6 +20,10 @@ Arrow Algorithms (Experimental/Contrib) A collection of algorithms for working with ValueVectors. + + 2023-12-13T00:00:00Z + + org.apache.arrow diff --git a/java/bom/pom.xml b/java/bom/pom.xml index 5c2ed33dadddf..94557abbbb25b 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -15,7 +15,7 @@ org.apache apache - 18 + 31 org.apache.arrow @@ -26,6 +26,7 @@ Arrow Bill of Materials + 3.5.0 diff --git a/java/c/pom.xml b/java/c/pom.xml index 8fc3f36994d8a..3a1730bb3f246 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -22,6 +22,7 @@ Java implementation of C Data Interface jar + 2023-12-13T00:00:00Z ./build diff --git a/java/compression/pom.xml b/java/compression/pom.xml index 9a9f029fee137..71a3414976bd9 100644 --- a/java/compression/pom.xml +++ b/java/compression/pom.xml @@ -20,6 +20,10 @@ Arrow Compression (Experimental/Contrib) A library for working with the compression/decompression of Arrow data. + + 2023-12-13T00:00:00Z + + org.apache.arrow diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml index bb5636b745490..11ecce0196bb6 100644 --- a/java/dataset/pom.xml +++ b/java/dataset/pom.xml @@ -24,6 +24,7 @@ Java implementation of Arrow Dataset API/Framework jar + 2023-12-13T00:00:00Z ../../../cpp/release-build/ 2.5.0 1.11.0 diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index 8f41d2b65b7d1..f8fd9398e69b8 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -24,6 +24,7 @@ jar + 2023-12-13T00:00:00Z 1 @@ -287,7 +288,7 @@ maven-assembly-plugin - 3.0.0 + 3.2.0 jar-with-dependencies diff --git a/java/flight/flight-grpc/pom.xml b/java/flight/flight-grpc/pom.xml index af765f8c436be..04f5d7aa6ab78 100644 --- a/java/flight/flight-grpc/pom.xml +++ b/java/flight/flight-grpc/pom.xml @@ -24,6 +24,7 @@ jar + 2023-12-13T00:00:00Z 1 diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml index bb4f6a6b18733..db8a116b82289 100644 --- a/java/flight/flight-integration-tests/pom.xml +++ b/java/flight/flight-integration-tests/pom.xml @@ -24,6 +24,10 @@ Integration tests for Flight RPC. jar + + 2023-12-13T00:00:00Z + + org.apache.arrow @@ -60,7 +64,7 @@ maven-assembly-plugin - 3.0.0 + 3.2.0 jar-with-dependencies diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index 1f20912b9974f..465c02f01eba8 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -28,6 +28,7 @@ https://arrow.apache.org + 2023-12-13T00:00:00Z ${project.parent.groupId}:${project.parent.artifactId} ${project.parent.version} ${project.name} diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml index 653ee5c192756..a470db8b7092d 100644 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -27,6 +27,10 @@ jar https://arrow.apache.org + + 2023-12-13T00:00:00Z + + diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index 3c7e4b3495e5a..edf0feb2c62cd 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -24,6 +24,7 @@ jar + 2023-12-13T00:00:00Z 1 diff --git a/java/format/pom.xml b/java/format/pom.xml index 3f581311e20ea..636ad97734b05 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -23,6 +23,10 @@ Arrow Format Generated Java files from the IPC Flatbuffer definitions. + + 2023-12-13T00:00:00Z + + com.google.flatbuffers diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index e837a09ff8330..1d544275d1701 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -23,6 +23,7 @@ Arrow Gandiva Java wrappers around the native Gandiva SQL expression compiler. + 2023-12-13T00:00:00Z 1.8 1.8 3.25.1 diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml index 70d1993b33c6e..67dac0f9c2114 100644 --- a/java/maven/module-info-compiler-maven-plugin/pom.xml +++ b/java/maven/module-info-compiler-maven-plugin/pom.xml @@ -30,6 +30,7 @@ + 2023-12-13T00:00:00Z 3.3.9 diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml index 6e411c0cd5440..eff375c63f967 100644 --- a/java/memory/memory-core/pom.xml +++ b/java/memory/memory-core/pom.xml @@ -22,6 +22,10 @@ Arrow Memory - Core Core off-heap memory management libraries for Arrow ValueVectors. + + 2023-12-13T00:00:00Z + + com.google.code.findbugs diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml index 159ab5160c983..e03c1064accdf 100644 --- a/java/memory/memory-netty/pom.xml +++ b/java/memory/memory-netty/pom.xml @@ -21,6 +21,10 @@ Arrow Memory - Netty Netty allocator and utils for allocating memory in Arrow + + 2023-12-13T00:00:00Z + + org.apache.arrow diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml index 5ef4e8a9149a5..2749baff9046b 100644 --- a/java/memory/memory-unsafe/pom.xml +++ b/java/memory/memory-unsafe/pom.xml @@ -21,6 +21,9 @@ Arrow Memory - Unsafe Allocator and utils for allocating memory in Arrow based on sun.misc.Unsafe + + 2023-12-13T00:00:00Z + diff --git a/java/performance/pom.xml b/java/performance/pom.xml index eff3240890beb..f581c97e333e7 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -80,6 +80,7 @@ + 2023-12-13T00:00:00Z UTF-8 1.21 1.8 diff --git a/java/pom.xml b/java/pom.xml index b2513d586268b..369b8719eb61d 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -15,7 +15,7 @@ org.apache apache - 18 + 31 org.apache.arrow @@ -28,6 +28,7 @@ https://arrow.apache.org/ + 3.5.0 ${project.build.directory}/generated-sources 1.9.0 5.10.1 @@ -446,6 +447,13 @@ maven-enforcer-plugin 3.0.0-M2 + + + + [3.3.0,4) + + + org.apache.maven.plugins diff --git a/java/tools/pom.xml b/java/tools/pom.xml index 8df436bac9aef..298c86f7211c6 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -20,6 +20,10 @@ Arrow Tools Java applications for working with Arrow ValueVectors. + + 2023-12-13T00:00:00Z + + org.apache.arrow @@ -81,7 +85,7 @@ maven-assembly-plugin - 3.0.0 + 3.2.0 jar-with-dependencies diff --git a/java/vector/pom.xml b/java/vector/pom.xml index da26fc2982765..e4f1c94f9142d 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -20,8 +20,11 @@ Arrow Vectors An off-heap reference implementation for Arrow columnar data format. - + + 2023-12-13T00:00:00Z + + org.apache.arrow arrow-format