Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace CASE_RANGES with Character.to{Lower,Upper}Case(). #78

Merged
merged 1 commit into from
Oct 22, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ sourceSets {
main {
java {
srcDir 'java'
exclude 'com/google/re2j/super/**'
}
}
test {
Expand Down
14 changes: 14 additions & 0 deletions java/com/google/re2j/Characters.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package com.google.re2j;

/** Wraps Character methods to be overridden for GWT. */
final class Characters {
private Characters() {}

static int toLowerCase(int codePoint) {
return Character.toLowerCase(codePoint);
}

static int toUpperCase(int codePoint) {
return Character.toUpperCase(codePoint);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
<inherits name="com.google.gwt.user.User" />
<entry-point class="com.google.re2j.FakeGWTEntryPoint" />
<source path=""/>
<super-source path="super"/>
</module>
80 changes: 2 additions & 78 deletions java/com/google/re2j/Unicode.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,6 @@ class Unicode {
// The highest legal Latin-1 value.
static final int MAX_LATIN1 = 0xFF;

private static final int MAX_CASE = 3;

// Represents invalid code points.
private static final int REPLACEMENT_CHAR = 0xFFFD;

// Minimum and maximum runes involved in folding.
// Checked during test.
static final int MIN_FOLD = 0x0041;
Expand Down Expand Up @@ -109,77 +104,6 @@ static boolean isPrint(int r) {
|| is(UnicodeTables.S, r);
}

// A case range is conceptually a record:
// class CaseRange {
// int lo, hi;
// int upper, lower, title;
// }
// but flattened as an int[5].

// to maps the rune using the specified case mapping.
private static int to(int kase, int r, int[][] caseRange) {
if (kase < 0 || MAX_CASE <= kase) {
return REPLACEMENT_CHAR; // as reasonable an error as any
}
// binary search over ranges
for (int lo = 0, hi = caseRange.length; lo < hi; ) {
int m = lo + (hi - lo) / 2;
int[] cr = caseRange[m]; // cr = [lo, hi, upper, lower, title]
int crlo = cr[0];
int crhi = cr[1];
if (crlo <= r && r <= crhi) {
int delta = cr[2 + kase];
if (delta > MAX_RUNE) {
// In an Upper-Lower sequence, which always starts with
// an UpperCase letter, the real deltas always look like:
// {0, 1, 0} UpperCase (Lower is next)
// {-1, 0, -1} LowerCase (Upper, Title are previous)
// The characters at even offsets from the beginning of the
// sequence are upper case; the ones at odd offsets are lower.
// The correct mapping can be done by clearing or setting the low
// bit in the sequence offset.
// The constants UpperCase and TitleCase are even while LowerCase
// is odd so we take the low bit from kase.
return crlo + (((r - crlo) & ~1) | (kase & 1));
}
return r + delta;
}
if (r < crlo) {
hi = m;
} else {
lo = m + 1;
}
}
return r;
}

// to maps the rune to the specified case: UpperCase, LowerCase, or TitleCase.
private static int to(int kase, int r) {
return to(kase, r, UnicodeTables.CASE_RANGES);
}

// toUpper maps the rune to upper case.
static int toUpper(int r) {
if (r <= MAX_ASCII) {
if ('a' <= r && r <= 'z') {
r -= 'a' - 'A';
}
return r;
}
return to(UnicodeTables.UpperCase, r);
}

// toLower maps the rune to lower case.
static int toLower(int r) {
if (r <= MAX_ASCII) {
if ('A' <= r && r <= 'Z') {
r += 'a' - 'A';
}
return r;
}
return to(UnicodeTables.LowerCase, r);
}

// simpleFold iterates over Unicode code points equivalent under
// the Unicode-defined simple case folding. Among the code points
// equivalent to rune (including rune itself), SimpleFold returns the
Expand Down Expand Up @@ -216,11 +140,11 @@ static int simpleFold(int r) {
// No folding specified. This is a one- or two-element
// equivalence class containing rune and toLower(rune)
// and toUpper(rune) if they are different from rune.
int l = toLower(r);
int l = Characters.toLowerCase(r);
if (l != r) {
return l;
}
return toUpper(r);
return Characters.toUpperCase(r);
}

private Unicode() {} // uninstantiable
Expand Down
260 changes: 0 additions & 260 deletions java/com/google/re2j/UnicodeTables.java
Original file line number Diff line number Diff line change
Expand Up @@ -5285,266 +5285,6 @@ private static int[][] make_White_Space() {
static final int[][] Unified_Ideograph = _Unified_Ideograph;
static final int[][] Variation_Selector = _Variation_Selector;
static final int[][] White_Space = _White_Space;
static final int[][] CASE_RANGES = {
{0x0041, 0x005A, 0, 32, 0},
{0x0061, 0x007A, -32, 0, -32},
{0x00B5, 0x00B5, 743, 0, 743},
{0x00C0, 0x00D6, 0, 32, 0},
{0x00D8, 0x00DE, 0, 32, 0},
{0x00E0, 0x00F6, -32, 0, -32},
{0x00F8, 0x00FE, -32, 0, -32},
{0x00FF, 0x00FF, 121, 0, 121},
{0x0100, 0x012F, UpperLower, UpperLower, UpperLower},
{0x0130, 0x0130, 0, -199, 0},
{0x0131, 0x0131, -232, 0, -232},
{0x0132, 0x0137, UpperLower, UpperLower, UpperLower},
{0x0139, 0x0148, UpperLower, UpperLower, UpperLower},
{0x014A, 0x0177, UpperLower, UpperLower, UpperLower},
{0x0178, 0x0178, 0, -121, 0},
{0x0179, 0x017E, UpperLower, UpperLower, UpperLower},
{0x017F, 0x017F, -300, 0, -300},
{0x0180, 0x0180, 195, 0, 195},
{0x0181, 0x0181, 0, 210, 0},
{0x0182, 0x0185, UpperLower, UpperLower, UpperLower},
{0x0186, 0x0186, 0, 206, 0},
{0x0187, 0x0188, UpperLower, UpperLower, UpperLower},
{0x0189, 0x018A, 0, 205, 0},
{0x018B, 0x018C, UpperLower, UpperLower, UpperLower},
{0x018E, 0x018E, 0, 79, 0},
{0x018F, 0x018F, 0, 202, 0},
{0x0190, 0x0190, 0, 203, 0},
{0x0191, 0x0192, UpperLower, UpperLower, UpperLower},
{0x0193, 0x0193, 0, 205, 0},
{0x0194, 0x0194, 0, 207, 0},
{0x0195, 0x0195, 97, 0, 97},
{0x0196, 0x0196, 0, 211, 0},
{0x0197, 0x0197, 0, 209, 0},
{0x0198, 0x0199, UpperLower, UpperLower, UpperLower},
{0x019A, 0x019A, 163, 0, 163},
{0x019C, 0x019C, 0, 211, 0},
{0x019D, 0x019D, 0, 213, 0},
{0x019E, 0x019E, 130, 0, 130},
{0x019F, 0x019F, 0, 214, 0},
{0x01A0, 0x01A5, UpperLower, UpperLower, UpperLower},
{0x01A6, 0x01A6, 0, 218, 0},
{0x01A7, 0x01A8, UpperLower, UpperLower, UpperLower},
{0x01A9, 0x01A9, 0, 218, 0},
{0x01AC, 0x01AD, UpperLower, UpperLower, UpperLower},
{0x01AE, 0x01AE, 0, 218, 0},
{0x01AF, 0x01B0, UpperLower, UpperLower, UpperLower},
{0x01B1, 0x01B2, 0, 217, 0},
{0x01B3, 0x01B6, UpperLower, UpperLower, UpperLower},
{0x01B7, 0x01B7, 0, 219, 0},
{0x01B8, 0x01B9, UpperLower, UpperLower, UpperLower},
{0x01BC, 0x01BD, UpperLower, UpperLower, UpperLower},
{0x01BF, 0x01BF, 56, 0, 56},
{0x01C4, 0x01C4, 0, 2, 1},
{0x01C5, 0x01C5, -1, 1, 0},
{0x01C6, 0x01C6, -2, 0, -1},
{0x01C7, 0x01C7, 0, 2, 1},
{0x01C8, 0x01C8, -1, 1, 0},
{0x01C9, 0x01C9, -2, 0, -1},
{0x01CA, 0x01CA, 0, 2, 1},
{0x01CB, 0x01CB, -1, 1, 0},
{0x01CC, 0x01CC, -2, 0, -1},
{0x01CD, 0x01DC, UpperLower, UpperLower, UpperLower},
{0x01DD, 0x01DD, -79, 0, -79},
{0x01DE, 0x01EF, UpperLower, UpperLower, UpperLower},
{0x01F1, 0x01F1, 0, 2, 1},
{0x01F2, 0x01F2, -1, 1, 0},
{0x01F3, 0x01F3, -2, 0, -1},
{0x01F4, 0x01F5, UpperLower, UpperLower, UpperLower},
{0x01F6, 0x01F6, 0, -97, 0},
{0x01F7, 0x01F7, 0, -56, 0},
{0x01F8, 0x021F, UpperLower, UpperLower, UpperLower},
{0x0220, 0x0220, 0, -130, 0},
{0x0222, 0x0233, UpperLower, UpperLower, UpperLower},
{0x023A, 0x023A, 0, 10795, 0},
{0x023B, 0x023C, UpperLower, UpperLower, UpperLower},
{0x023D, 0x023D, 0, -163, 0},
{0x023E, 0x023E, 0, 10792, 0},
{0x023F, 0x0240, 10815, 0, 10815},
{0x0241, 0x0242, UpperLower, UpperLower, UpperLower},
{0x0243, 0x0243, 0, -195, 0},
{0x0244, 0x0244, 0, 69, 0},
{0x0245, 0x0245, 0, 71, 0},
{0x0246, 0x024F, UpperLower, UpperLower, UpperLower},
{0x0250, 0x0250, 10783, 0, 10783},
{0x0251, 0x0251, 10780, 0, 10780},
{0x0252, 0x0252, 10782, 0, 10782},
{0x0253, 0x0253, -210, 0, -210},
{0x0254, 0x0254, -206, 0, -206},
{0x0256, 0x0257, -205, 0, -205},
{0x0259, 0x0259, -202, 0, -202},
{0x025B, 0x025B, -203, 0, -203},
{0x0260, 0x0260, -205, 0, -205},
{0x0263, 0x0263, -207, 0, -207},
{0x0265, 0x0265, 42280, 0, 42280},
{0x0268, 0x0268, -209, 0, -209},
{0x0269, 0x0269, -211, 0, -211},
{0x026B, 0x026B, 10743, 0, 10743},
{0x026F, 0x026F, -211, 0, -211},
{0x0271, 0x0271, 10749, 0, 10749},
{0x0272, 0x0272, -213, 0, -213},
{0x0275, 0x0275, -214, 0, -214},
{0x027D, 0x027D, 10727, 0, 10727},
{0x0280, 0x0280, -218, 0, -218},
{0x0283, 0x0283, -218, 0, -218},
{0x0288, 0x0288, -218, 0, -218},
{0x0289, 0x0289, -69, 0, -69},
{0x028A, 0x028B, -217, 0, -217},
{0x028C, 0x028C, -71, 0, -71},
{0x0292, 0x0292, -219, 0, -219},
{0x0345, 0x0345, 84, 0, 84},
{0x0370, 0x0373, UpperLower, UpperLower, UpperLower},
{0x0376, 0x0377, UpperLower, UpperLower, UpperLower},
{0x037B, 0x037D, 130, 0, 130},
{0x0386, 0x0386, 0, 38, 0},
{0x0388, 0x038A, 0, 37, 0},
{0x038C, 0x038C, 0, 64, 0},
{0x038E, 0x038F, 0, 63, 0},
{0x0391, 0x03A1, 0, 32, 0},
{0x03A3, 0x03AB, 0, 32, 0},
{0x03AC, 0x03AC, -38, 0, -38},
{0x03AD, 0x03AF, -37, 0, -37},
{0x03B1, 0x03C1, -32, 0, -32},
{0x03C2, 0x03C2, -31, 0, -31},
{0x03C3, 0x03CB, -32, 0, -32},
{0x03CC, 0x03CC, -64, 0, -64},
{0x03CD, 0x03CE, -63, 0, -63},
{0x03CF, 0x03CF, 0, 8, 0},
{0x03D0, 0x03D0, -62, 0, -62},
{0x03D1, 0x03D1, -57, 0, -57},
{0x03D5, 0x03D5, -47, 0, -47},
{0x03D6, 0x03D6, -54, 0, -54},
{0x03D7, 0x03D7, -8, 0, -8},
{0x03D8, 0x03EF, UpperLower, UpperLower, UpperLower},
{0x03F0, 0x03F0, -86, 0, -86},
{0x03F1, 0x03F1, -80, 0, -80},
{0x03F2, 0x03F2, 7, 0, 7},
{0x03F4, 0x03F4, 0, -60, 0},
{0x03F5, 0x03F5, -96, 0, -96},
{0x03F7, 0x03F8, UpperLower, UpperLower, UpperLower},
{0x03F9, 0x03F9, 0, -7, 0},
{0x03FA, 0x03FB, UpperLower, UpperLower, UpperLower},
{0x03FD, 0x03FF, 0, -130, 0},
{0x0400, 0x040F, 0, 80, 0},
{0x0410, 0x042F, 0, 32, 0},
{0x0430, 0x044F, -32, 0, -32},
{0x0450, 0x045F, -80, 0, -80},
{0x0460, 0x0481, UpperLower, UpperLower, UpperLower},
{0x048A, 0x04BF, UpperLower, UpperLower, UpperLower},
{0x04C0, 0x04C0, 0, 15, 0},
{0x04C1, 0x04CE, UpperLower, UpperLower, UpperLower},
{0x04CF, 0x04CF, -15, 0, -15},
{0x04D0, 0x0527, UpperLower, UpperLower, UpperLower},
{0x0531, 0x0556, 0, 48, 0},
{0x0561, 0x0586, -48, 0, -48},
{0x10A0, 0x10C5, 0, 7264, 0},
{0x1D79, 0x1D79, 35332, 0, 35332},
{0x1D7D, 0x1D7D, 3814, 0, 3814},
{0x1E00, 0x1E95, UpperLower, UpperLower, UpperLower},
{0x1E9B, 0x1E9B, -59, 0, -59},
{0x1E9E, 0x1E9E, 0, -7615, 0},
{0x1EA0, 0x1EFF, UpperLower, UpperLower, UpperLower},
{0x1F00, 0x1F07, 8, 0, 8},
{0x1F08, 0x1F0F, 0, -8, 0},
{0x1F10, 0x1F15, 8, 0, 8},
{0x1F18, 0x1F1D, 0, -8, 0},
{0x1F20, 0x1F27, 8, 0, 8},
{0x1F28, 0x1F2F, 0, -8, 0},
{0x1F30, 0x1F37, 8, 0, 8},
{0x1F38, 0x1F3F, 0, -8, 0},
{0x1F40, 0x1F45, 8, 0, 8},
{0x1F48, 0x1F4D, 0, -8, 0},
{0x1F51, 0x1F51, 8, 0, 8},
{0x1F53, 0x1F53, 8, 0, 8},
{0x1F55, 0x1F55, 8, 0, 8},
{0x1F57, 0x1F57, 8, 0, 8},
{0x1F59, 0x1F59, 0, -8, 0},
{0x1F5B, 0x1F5B, 0, -8, 0},
{0x1F5D, 0x1F5D, 0, -8, 0},
{0x1F5F, 0x1F5F, 0, -8, 0},
{0x1F60, 0x1F67, 8, 0, 8},
{0x1F68, 0x1F6F, 0, -8, 0},
{0x1F70, 0x1F71, 74, 0, 74},
{0x1F72, 0x1F75, 86, 0, 86},
{0x1F76, 0x1F77, 100, 0, 100},
{0x1F78, 0x1F79, 128, 0, 128},
{0x1F7A, 0x1F7B, 112, 0, 112},
{0x1F7C, 0x1F7D, 126, 0, 126},
{0x1F80, 0x1F87, 8, 0, 8},
{0x1F88, 0x1F8F, 0, -8, 0},
{0x1F90, 0x1F97, 8, 0, 8},
{0x1F98, 0x1F9F, 0, -8, 0},
{0x1FA0, 0x1FA7, 8, 0, 8},
{0x1FA8, 0x1FAF, 0, -8, 0},
{0x1FB0, 0x1FB1, 8, 0, 8},
{0x1FB3, 0x1FB3, 9, 0, 9},
{0x1FB8, 0x1FB9, 0, -8, 0},
{0x1FBA, 0x1FBB, 0, -74, 0},
{0x1FBC, 0x1FBC, 0, -9, 0},
{0x1FBE, 0x1FBE, -7205, 0, -7205},
{0x1FC3, 0x1FC3, 9, 0, 9},
{0x1FC8, 0x1FCB, 0, -86, 0},
{0x1FCC, 0x1FCC, 0, -9, 0},
{0x1FD0, 0x1FD1, 8, 0, 8},
{0x1FD8, 0x1FD9, 0, -8, 0},
{0x1FDA, 0x1FDB, 0, -100, 0},
{0x1FE0, 0x1FE1, 8, 0, 8},
{0x1FE5, 0x1FE5, 7, 0, 7},
{0x1FE8, 0x1FE9, 0, -8, 0},
{0x1FEA, 0x1FEB, 0, -112, 0},
{0x1FEC, 0x1FEC, 0, -7, 0},
{0x1FF3, 0x1FF3, 9, 0, 9},
{0x1FF8, 0x1FF9, 0, -128, 0},
{0x1FFA, 0x1FFB, 0, -126, 0},
{0x1FFC, 0x1FFC, 0, -9, 0},
{0x2126, 0x2126, 0, -7517, 0},
{0x212A, 0x212A, 0, -8383, 0},
{0x212B, 0x212B, 0, -8262, 0},
{0x2132, 0x2132, 0, 28, 0},
{0x214E, 0x214E, -28, 0, -28},
{0x2160, 0x216F, 0, 16, 0},
{0x2170, 0x217F, -16, 0, -16},
{0x2183, 0x2184, UpperLower, UpperLower, UpperLower},
{0x24B6, 0x24CF, 0, 26, 0},
{0x24D0, 0x24E9, -26, 0, -26},
{0x2C00, 0x2C2E, 0, 48, 0},
{0x2C30, 0x2C5E, -48, 0, -48},
{0x2C60, 0x2C61, UpperLower, UpperLower, UpperLower},
{0x2C62, 0x2C62, 0, -10743, 0},
{0x2C63, 0x2C63, 0, -3814, 0},
{0x2C64, 0x2C64, 0, -10727, 0},
{0x2C65, 0x2C65, -10795, 0, -10795},
{0x2C66, 0x2C66, -10792, 0, -10792},
{0x2C67, 0x2C6C, UpperLower, UpperLower, UpperLower},
{0x2C6D, 0x2C6D, 0, -10780, 0},
{0x2C6E, 0x2C6E, 0, -10749, 0},
{0x2C6F, 0x2C6F, 0, -10783, 0},
{0x2C70, 0x2C70, 0, -10782, 0},
{0x2C72, 0x2C73, UpperLower, UpperLower, UpperLower},
{0x2C75, 0x2C76, UpperLower, UpperLower, UpperLower},
{0x2C7E, 0x2C7F, 0, -10815, 0},
{0x2C80, 0x2CE3, UpperLower, UpperLower, UpperLower},
{0x2CEB, 0x2CEE, UpperLower, UpperLower, UpperLower},
{0x2D00, 0x2D25, -7264, 0, -7264},
{0xA640, 0xA66D, UpperLower, UpperLower, UpperLower},
{0xA680, 0xA697, UpperLower, UpperLower, UpperLower},
{0xA722, 0xA72F, UpperLower, UpperLower, UpperLower},
{0xA732, 0xA76F, UpperLower, UpperLower, UpperLower},
{0xA779, 0xA77C, UpperLower, UpperLower, UpperLower},
{0xA77D, 0xA77D, 0, -35332, 0},
{0xA77E, 0xA787, UpperLower, UpperLower, UpperLower},
{0xA78B, 0xA78C, UpperLower, UpperLower, UpperLower},
{0xA78D, 0xA78D, 0, -42280, 0},
{0xA790, 0xA791, UpperLower, UpperLower, UpperLower},
{0xA7A0, 0xA7A9, UpperLower, UpperLower, UpperLower},
{0xFF21, 0xFF3A, 0, 32, 0},
{0xFF41, 0xFF5A, -32, 0, -32},
{0x10400, 0x10427, 0, 40, 0},
{0x10428, 0x1044F, -40, 0, -40},
};
static final int[][] CASE_ORBIT = {
{0x004B, 0x006B},
{0x0053, 0x0073},
Expand Down
Loading