diff --git a/build.gradle b/build.gradle index 11747b70c332..7de2fb290c73 100644 --- a/build.gradle +++ b/build.gradle @@ -81,18 +81,6 @@ ext { minJavaVersion = JavaVersion.VERSION_11 - // Declare script dependency versions outside of palantir's - // version unification control. These are not our main dependencies. - scriptDepVersions = [ - "apache-rat": "0.11", - "commons-codec": "1.13", - "ecj": "3.25.0", - "javacc": "7.0.4", - "jflex": "1.7.0", - "jgit": "5.9.0.202009080501-r", - "flexmark": "0.61.24", - ] - // Allow definiting external tool locations using system props. externalTool = { name -> def resolved = propertyOrDefault("${name}.exe", name as String) @@ -101,6 +89,8 @@ ext { } } +apply from: file('buildSrc/scriptDepVersions.gradle') + // Include smaller chunks configuring dedicated build areas. // Some of these intersect or add additional functionality. // The order of inclusion of these files shouldn't matter (but may @@ -152,6 +142,7 @@ apply from: file('gradle/generation/kuromoji.gradle') apply from: file('gradle/generation/nori.gradle') apply from: file('gradle/generation/icu.gradle') apply from: file('gradle/generation/javacc.gradle') +apply from: file('gradle/generation/unicode-data.gradle') apply from: file('gradle/datasets/external-datasets.gradle') diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle index e1d90c412b1b..5d5798142dd0 100644 --- a/buildSrc/build.gradle +++ b/buildSrc/build.gradle @@ -15,26 +15,21 @@ * limitations under the License. */ - -// Make sure the build environment is consistent. -apply from: file('../gradle/validation/check-environment.gradle') - repositories { mavenCentral() } -ext { - // Declare script dependency versions outside of palantir's - // version unification control. These are not our main dependencies. - scriptDepVersions = [ - "commons-codec": "1.13" - ] -} +// Make sure the build environment is consistent. +apply from: file('../gradle/validation/check-environment.gradle') + +// Load common buildSrc and script deps. +apply from: file("scriptDepVersions.gradle") dependencies { implementation gradleApi() implementation localGroovy() implementation "commons-codec:commons-codec:${scriptDepVersions['commons-codec']}" + implementation "com.ibm.icu:icu4j:${scriptDepVersions['icu']}" } diff --git a/buildSrc/scriptDepVersions.gradle b/buildSrc/scriptDepVersions.gradle new file mode 100644 index 000000000000..c768243b960d --- /dev/null +++ b/buildSrc/scriptDepVersions.gradle @@ -0,0 +1,16 @@ +// Declare script dependency versions outside of palantir's +// version unification control. These are not our main dependencies +// but are reused in buildSrc and across applied scripts. + +ext { + scriptDepVersions = [ + "apache-rat": "0.11", + "commons-codec": "1.13", + "ecj": "3.25.0", + "flexmark": "0.61.24", + "icu": "68.2", + "javacc": "7.0.4", + "jflex": "1.7.0", + "jgit": "5.9.0.202009080501-r", + ] +} diff --git a/lucene/analysis/common/src/tools/groovy/generate-unicode-data.groovy b/gradle/generation/unicode-data.gradle similarity index 65% rename from lucene/analysis/common/src/tools/groovy/generate-unicode-data.groovy rename to gradle/generation/unicode-data.gradle index 37857e09b00f..4c18c1ee3786 100644 --- a/lucene/analysis/common/src/tools/groovy/generate-unicode-data.groovy +++ b/gradle/generation/unicode-data.gradle @@ -1,3 +1,6 @@ +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.util.VersionInfo; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -15,34 +18,36 @@ * limitations under the License. */ -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.util.VersionInfo; +// Regenerates UnicodeProps.java +configure(project(":lucene:analysis:common")) { + task generateUnicodeProps() { + def outputFile = file("src/java/org/apache/lucene/analysis/util/UnicodeProps.java") -def linesep = properties['line.separator']; + def icuVersion = VersionInfo.ICU_VERSION.toString() + def unicodeVersion = UCharacter.getUnicodeVersion().toString() -def appendChar = { StringBuilder sb, int c -> - int len = sb.length(); - if (len != 0) { - sb.append(', '); - } - if (len == 0 || len - sb.lastIndexOf(linesep) > 100) { - sb.append(linesep).append(' '); - } - sb.append(String.format(Locale.ROOT, "0x%04X", c)); -} + inputs.property("icu-version", icuVersion) + inputs.property("unicode-version", unicodeVersion) + outputs.file outputFile -def whitespace = new StringBuilder(); -for (int c = UCharacter.MIN_CODE_POINT; c <= UCharacter.MAX_CODE_POINT; c++) { - if (UCharacter.isUWhiteSpace(c)) { - appendChar(whitespace, c); - } -} + doFirst { + def icuLockDepVersion = getVersion("com.ibm.icu", "icu4j") + def icuScriptDep = scriptDepVersions['icu'] + if (icuLockDepVersion != icuScriptDep) { + throw new GradleException("ICU version in build script dependency ${icuScriptDep} and in" + + " project dependency ${icuLockDepVersion} must match.") + } -def icuVersion = VersionInfo.ICU_VERSION.toString(); -def unicodeVersion = UCharacter.getUnicodeVersion().toString(); + List chars = [] + for (int c = UCharacter.MIN_CODE_POINT; c <= UCharacter.MAX_CODE_POINT; c++) { + if (UCharacter.isUWhiteSpace(c)) { + chars.add(String.format(Locale.ROOT, "0x%04X", c)) + } + } + def whitespace = chars.join(", ") -def code = """ -// DO NOT EDIT THIS FILE! Use "ant unicode-data" to recreate. + def code = """ +// DO NOT EDIT THIS FILE! Use "gradlew generateUnicodeProps tidy" to recreate. /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -68,9 +73,7 @@ import org.apache.lucene.util.SparseFixedBitSet; /** * This file contains unicode properties used by various {@link CharTokenizer}s. - * The data was created using ICU4J v${icuVersion} - *

- * Unicode version: ${unicodeVersion} + * The data was generated using ICU4J v${icuVersion}, unicode version: ${unicodeVersion}. */ public final class UnicodeProps { private UnicodeProps() {} @@ -80,7 +83,7 @@ public final class UnicodeProps { /** Bitset with Unicode WHITESPACE code points. */ public static final Bits WHITESPACE = createBits(${whitespace}); - + private static Bits createBits(final int... codepoints) { final int len = codepoints[codepoints.length - 1] + 1; final SparseFixedBitSet bitset = new SparseFixedBitSet(len); @@ -98,9 +101,10 @@ public final class UnicodeProps { }; } } -"""; - -File f = new File(properties['unicode-props-file']); -f.write(code.trim(), 'UTF-8'); +""" + outputFile.setText(code.trim(), "UTF-8") + } + } -task.log("Unicode data written to: " + f); + regenerate.dependsOn wrapWithPersistentChecksums(generateUnicodeProps, [ andThenTasks: "spotlessApply" ]) +} \ No newline at end of file diff --git a/lucene/analysis/common/src/generated/checksums/generateUnicodeProps.json b/lucene/analysis/common/src/generated/checksums/generateUnicodeProps.json new file mode 100644 index 000000000000..b72e91592b80 --- /dev/null +++ b/lucene/analysis/common/src/generated/checksums/generateUnicodeProps.json @@ -0,0 +1,3 @@ +{ + "lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java": "7d2cf5f959c2dfc5b83295e359212a1228f761c4" +} \ No newline at end of file diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java index 777100b7438e..55c3f2f28726 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java @@ -1,4 +1,4 @@ -// DO NOT EDIT THIS FILE! Use "ant unicode-data" to recreate. +// DO NOT EDIT THIS FILE! Use "gradlew generateUnicodeProps tidy" to recreate. /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -24,15 +24,13 @@ /** * This file contains unicode properties used by various {@link CharTokenizer}s. The data was - * created using ICU4J v62.2.0.0 - * - *

Unicode version: 11.0.0.0 + * generated using ICU4J v68.2.0.0, unicode version: 13.0.0.0. */ public final class UnicodeProps { private UnicodeProps() {} /** Unicode version that was used to generate this file: {@value} */ - public static final String UNICODE_VERSION = "11.0.0.0"; + public static final String UNICODE_VERSION = "13.0.0.0"; /** Bitset with Unicode WHITESPACE code points. */ public static final Bits WHITESPACE =