codenameone · shai-almog · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/CodenameOne/src/com/codename1/util/regex/RECharacter.java b/CodenameOne/src/com/codename1/util/regex/RECharacter.java
@@ -192,12 +192,30 @@ public static byte getType(char c) {
 //#                 return CHAR_CLASSES[i][c - spaceIndex];
 //#             }
 //#         }
+//#         return UNASSIGNED;
 //#else
         if (c < 128) {
             return CHAR_CLASSES[c];
         }
-//#endif
+        // The framework is compiled against the CLDC11 stub, which does not
+        // expose Character.getType or isLetter. Compose what we need from the
+        // available primitives. Letters that are neither cased nor digits
+        // (modifier letters, OTHER_LETTER such as CJK ideographs) still fall
+        // through to UNASSIGNED.
+        if (Character.isLowerCase(c)) {
+            return LOWERCASE_LETTER;
+        }
+        if (Character.isUpperCase(c)) {
+            return UPPERCASE_LETTER;
+        }
+        if (Character.isDigit(c)) {
+            return DECIMAL_DIGIT_NUMBER;
+        }
+        if (Character.isSpaceChar(c)) {
+            return SPACE_SEPARATOR;
+        }
         return UNASSIGNED;
+//#endif
     }
 //#endif
 

diff --git a/maven/core-unittests/src/test/java/com/codename1/util/regex/RETest.java b/maven/core-unittests/src/test/java/com/codename1/util/regex/RETest.java
@@ -70,4 +70,94 @@ void testPosixClassesAndEscapes() throws Exception {
         assertFalse(wordThenDigits.match("item-42"));
     }
 
+    // Non-Latin coverage. Source files must remain ASCII-only (CI javac uses
+    // the platform default encoding), so non-ASCII test data is written with
+    // Java's backslash-u escape syntax inside string literals.
+    //
+    //   U+00E7 c-cedilla (lower)        U+00C7 C-cedilla (upper)
+    //   U+03B1 Greek alpha (lower)      U+03A3 Greek Sigma (upper)
+    //   U+044F Cyrillic ya (lower)      U+042F Cyrillic YA (upper)
+    //   U+00BD vulgar fraction one-half (OTHER_NUMBER, not a decimal digit)
+    //   U+20AC euro sign                (CURRENCY_SYMBOL)
+    //
+    // The framework is compiled against the CLDC11 stub, which exposes only
+    // isLowerCase / isUpperCase / isDigit / isSpaceChar (no isLetter or
+    // getType). That is enough for cased letters in Latin / Greek / Cyrillic
+    // and decimal digits, but uncased letters such as CJK ideographs
+    // (OTHER_LETTER) cannot be classified and remain unmatched here.
+
+    @FormTest
+    void testPosixAlphaMatchesNonLatinLetters() throws Exception {
+        RE alpha = new RE("^[[:alpha:]]+$");
+        assertTrue(alpha.match("\u00E7\u00C7"), "Latin with cedilla");
+        assertTrue(alpha.match("\u03B1\u03A3"), "Greek letters");
+        assertTrue(alpha.match("\u042F\u044F"), "Cyrillic letters");
+        assertTrue(alpha.match("abc\u00E7\u03B1\u042F"), "mixed scripts");
+
+        assertFalse(alpha.match("\u00E71"), "letter followed by ASCII digit");
+        assertFalse(alpha.match("\u00BD"), "vulgar fraction is not alpha");
+        assertFalse(alpha.match("\u20AC"), "currency symbol is not alpha");
+    }
+
+    @FormTest
+    void testPosixAlnumMatchesNonLatinLettersAndDigits() throws Exception {
+        RE alnum = new RE("^[[:alnum:]]+$");
+        assertTrue(alnum.match("\u00E7123"), "c-cedilla followed by digits");
+        assertTrue(alnum.match("\u03B1\u03B2\u03B3"), "Greek run");
+        assertTrue(alnum.match("abc\u042F9"), "ASCII + Cyrillic + digit");
+
+        assertFalse(alnum.match("\u00E7-123"), "hyphen breaks alnum");
+        assertFalse(alnum.match("\u00BD"), "fraction is not alnum (not a decimal digit)");
+        assertFalse(alnum.match("\u20AC"), "currency symbol is not alnum");
+    }
+
+    @FormTest
+    void testPosixLowerUpperOnNonLatinLetters() throws Exception {
+        RE lower = new RE("^[[:lower:]]+$");
+        assertTrue(lower.match("\u00E7"), "c-cedilla is lower");
+        assertTrue(lower.match("\u03B1"), "Greek alpha is lower");
+        assertTrue(lower.match("\u044F"), "Cyrillic ya is lower");
+        assertFalse(lower.match("\u00C7"), "C-cedilla is not lower");
+        assertFalse(lower.match("\u042F"), "Cyrillic YA is not lower");
+
+        RE upper = new RE("^[[:upper:]]+$");
+        assertTrue(upper.match("\u00C7"), "C-cedilla is upper");
+        assertTrue(upper.match("\u03A3"), "Greek Sigma is upper");
+        assertTrue(upper.match("\u042F"), "Cyrillic YA is upper");
+        assertFalse(upper.match("\u00E7"), "c-cedilla is not upper");
+    }
+
+    @FormTest
+    void testReportedAlphaAlnumCaptureBug() throws Exception {
+        // Regression: "test:\\s*([[:alpha:]][[:alnum:]]*)" used to silently fail
+        // to match identifiers that begin with a non-ASCII letter, because
+        // RECharacter.getType() returned UNASSIGNED for any char >= 128.
+        RE expression = new RE("test:\\s*([[:alpha:]][[:alnum:]]*)");
+
+        assertTrue(expression.match("test: \u00E7123"),
+                "alpha+alnum should match identifier starting with c-cedilla");
+        assertEquals("\u00E7123", expression.getParen(1));
+
+        assertTrue(expression.match("test: \u03B1\u03B2\u03B30"),
+                "alpha+alnum should match a Greek identifier");
+        assertEquals("\u03B1\u03B2\u03B30", expression.getParen(1));
+
+        assertTrue(expression.match("test: \u042F\u044F1"),
+                "alpha+alnum should match a Cyrillic identifier");
+        assertEquals("\u042F\u044F1", expression.getParen(1));
+
+        // A leading ASCII digit is still rejected (must start with [[:alpha:]]).
+        assertFalse(expression.match("test: 9abc"));
+    }
+
+    @FormTest
+    void testPosixDigitIsAsciiOnlyForOtherNumbers() throws Exception {
+        // [[:digit:]] is decimal digits; vulgar fractions / superscripts
+        // (OTHER_NUMBER) and currency / symbols must not match.
+        RE digit = new RE("^[[:digit:]]+$");
+        assertFalse(digit.match("\u00BD"), "one-half is not a decimal digit");
+        assertFalse(digit.match("\u20AC"), "euro sign is not a digit");
+        assertFalse(digit.match("\u00E7"), "letter is not a digit");
+    }
+
 }