Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 3 additions & 12 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -81,18 +81,6 @@ ext {

minJavaVersion = JavaVersion.VERSION_11

// Declare script dependency versions outside of palantir's
// version unification control. These are not our main dependencies.
scriptDepVersions = [
"apache-rat": "0.11",
"commons-codec": "1.13",
"ecj": "3.25.0",
"javacc": "7.0.4",
"jflex": "1.7.0",
"jgit": "5.9.0.202009080501-r",
"flexmark": "0.61.24",
]

// Allow definiting external tool locations using system props.
externalTool = { name ->
def resolved = propertyOrDefault("${name}.exe", name as String)
Expand All @@ -101,6 +89,8 @@ ext {
}
}

apply from: file('buildSrc/scriptDepVersions.gradle')

// Include smaller chunks configuring dedicated build areas.
// Some of these intersect or add additional functionality.
// The order of inclusion of these files shouldn't matter (but may
Expand Down Expand Up @@ -152,6 +142,7 @@ apply from: file('gradle/generation/kuromoji.gradle')
apply from: file('gradle/generation/nori.gradle')
apply from: file('gradle/generation/icu.gradle')
apply from: file('gradle/generation/javacc.gradle')
apply from: file('gradle/generation/unicode-data.gradle')

apply from: file('gradle/datasets/external-datasets.gradle')

Expand Down
17 changes: 6 additions & 11 deletions buildSrc/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,21 @@
* limitations under the License.
*/


// Make sure the build environment is consistent.
apply from: file('../gradle/validation/check-environment.gradle')

repositories {
mavenCentral()
}

ext {
// Declare script dependency versions outside of palantir's
// version unification control. These are not our main dependencies.
scriptDepVersions = [
"commons-codec": "1.13"
]
}
// Make sure the build environment is consistent.
apply from: file('../gradle/validation/check-environment.gradle')

// Load common buildSrc and script deps.
apply from: file("scriptDepVersions.gradle")

dependencies {
implementation gradleApi()
implementation localGroovy()

implementation "commons-codec:commons-codec:${scriptDepVersions['commons-codec']}"
implementation "com.ibm.icu:icu4j:${scriptDepVersions['icu']}"
}

16 changes: 16 additions & 0 deletions buildSrc/scriptDepVersions.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Declare script dependency versions outside of palantir's
// version unification control. These are not our main dependencies
// but are reused in buildSrc and across applied scripts.

ext {
scriptDepVersions = [
"apache-rat": "0.11",
"commons-codec": "1.13",
"ecj": "3.25.0",
"flexmark": "0.61.24",
"icu": "68.2",
"javacc": "7.0.4",
"jflex": "1.7.0",
"jgit": "5.9.0.202009080501-r",
]
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.util.VersionInfo;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
Expand All @@ -15,34 +18,36 @@
* limitations under the License.
*/

import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.util.VersionInfo;
// Regenerates UnicodeProps.java
configure(project(":lucene:analysis:common")) {
task generateUnicodeProps() {
def outputFile = file("src/java/org/apache/lucene/analysis/util/UnicodeProps.java")

def linesep = properties['line.separator'];
def icuVersion = VersionInfo.ICU_VERSION.toString()
def unicodeVersion = UCharacter.getUnicodeVersion().toString()

def appendChar = { StringBuilder sb, int c ->
int len = sb.length();
if (len != 0) {
sb.append(', ');
}
if (len == 0 || len - sb.lastIndexOf(linesep) > 100) {
sb.append(linesep).append(' ');
}
sb.append(String.format(Locale.ROOT, "0x%04X", c));
}
inputs.property("icu-version", icuVersion)
inputs.property("unicode-version", unicodeVersion)
outputs.file outputFile

def whitespace = new StringBuilder();
for (int c = UCharacter.MIN_CODE_POINT; c <= UCharacter.MAX_CODE_POINT; c++) {
if (UCharacter.isUWhiteSpace(c)) {
appendChar(whitespace, c);
}
}
doFirst {
def icuLockDepVersion = getVersion("com.ibm.icu", "icu4j")
def icuScriptDep = scriptDepVersions['icu']
if (icuLockDepVersion != icuScriptDep) {
throw new GradleException("ICU version in build script dependency ${icuScriptDep} and in" +
" project dependency ${icuLockDepVersion} must match.")
}

def icuVersion = VersionInfo.ICU_VERSION.toString();
def unicodeVersion = UCharacter.getUnicodeVersion().toString();
List<String> chars = []
for (int c = UCharacter.MIN_CODE_POINT; c <= UCharacter.MAX_CODE_POINT; c++) {
if (UCharacter.isUWhiteSpace(c)) {
chars.add(String.format(Locale.ROOT, "0x%04X", c))
}
}
def whitespace = chars.join(", ")

def code = """
// DO NOT EDIT THIS FILE! Use "ant unicode-data" to recreate.
def code = """
// DO NOT EDIT THIS FILE! Use "gradlew generateUnicodeProps tidy" to recreate.

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Expand All @@ -68,9 +73,7 @@ import org.apache.lucene.util.SparseFixedBitSet;

/**
* This file contains unicode properties used by various {@link CharTokenizer}s.
* The data was created using ICU4J v${icuVersion}
* <p>
* Unicode version: ${unicodeVersion}
* The data was generated using ICU4J v${icuVersion}, unicode version: ${unicodeVersion}.
*/
public final class UnicodeProps {
private UnicodeProps() {}
Expand All @@ -80,7 +83,7 @@ public final class UnicodeProps {

/** Bitset with Unicode WHITESPACE code points. */
public static final Bits WHITESPACE = createBits(${whitespace});

private static Bits createBits(final int... codepoints) {
final int len = codepoints[codepoints.length - 1] + 1;
final SparseFixedBitSet bitset = new SparseFixedBitSet(len);
Expand All @@ -98,9 +101,10 @@ public final class UnicodeProps {
};
}
}
""";

File f = new File(properties['unicode-props-file']);
f.write(code.trim(), 'UTF-8');
"""
outputFile.setText(code.trim(), "UTF-8")
}
}

task.log("Unicode data written to: " + f);
regenerate.dependsOn wrapWithPersistentChecksums(generateUnicodeProps, [ andThenTasks: "spotlessApply" ])
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java": "7d2cf5f959c2dfc5b83295e359212a1228f761c4"
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// DO NOT EDIT THIS FILE! Use "ant unicode-data" to recreate.
// DO NOT EDIT THIS FILE! Use "gradlew generateUnicodeProps tidy" to recreate.

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Expand All @@ -24,15 +24,13 @@

/**
* This file contains unicode properties used by various {@link CharTokenizer}s. The data was
* created using ICU4J v62.2.0.0
*
* <p>Unicode version: 11.0.0.0
* generated using ICU4J v68.2.0.0, unicode version: 13.0.0.0.
*/
public final class UnicodeProps {
private UnicodeProps() {}

/** Unicode version that was used to generate this file: {@value} */
public static final String UNICODE_VERSION = "11.0.0.0";
public static final String UNICODE_VERSION = "13.0.0.0";

/** Bitset with Unicode WHITESPACE code points. */
public static final Bits WHITESPACE =
Expand Down