Skip to content

Commit

Permalink
feat: avoid String.toUpper() and implement Regular Expression based…
Browse files Browse the repository at this point in the history
… `StringUtils.startsWithIgnoringCase()`
  • Loading branch information
manticore-projects committed Feb 1, 2024
1 parent 9e88133 commit 136a77c
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 25 deletions.
49 changes: 24 additions & 25 deletions h2/src/main/org/h2/bnf/context/DbContextRule.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,27 +69,27 @@ public void accept(BnfVisitor visitor) {

@Override
public boolean autoComplete(Sentence sentence) {
String query = sentence.getQuery(), s = query;
String up = sentence.getQueryUpper();
final String query = sentence.getQuery();
String s = query;
switch (type) {
case SCHEMA: {
DbSchema[] schemas = contents.getSchemas();
String best = null;
DbSchema bestSchema = null;
for (DbSchema schema: schemas) {
String name = StringUtils.toUpperEnglish(schema.name);
String quotedName = StringUtils.quoteIdentifier(schema.name);
if (up.startsWith(name)) {
String name = schema.name;
String quotedName = StringUtils.quoteIdentifier(name);
if (StringUtils.startsWithIgnoringCase(query, name)) {
if (best == null || name.length() > best.length()) {
best = name;
bestSchema = schema;
}
} else if (query.startsWith(quotedName)) {
} else if (StringUtils.startsWith(query, quotedName)) {
if (best == null || name.length() > best.length()) {
best = quotedName;
bestSchema = schema;
}
} else if (s.isEmpty() || name.startsWith(up) || quotedName.startsWith(query) ) {
} else if (s.isEmpty() || StringUtils.startsWithIgnoringCase(name, query) || StringUtils.startsWithIgnoringCase(quotedName, query)) {
if (s.length() < name.length()) {
sentence.add(name, name.substring(s.length()), type);
sentence.add(schema.quotedName + ".",
Expand All @@ -113,15 +113,15 @@ public boolean autoComplete(Sentence sentence) {
String best = null;
DbTableOrView bestTable = null;
for (DbTableOrView table : tables) {
String name = StringUtils.toUpperEnglish(table.getName());
String quotedName = StringUtils.quoteIdentifier(StringUtils.toUpperEnglish(table.getName()));
String name = table.getName();
String quotedName = StringUtils.quoteIdentifier(name);

if (up.startsWith(name) || ("\"" + up).startsWith(quotedName)) {
if (StringUtils.startsWithIgnoringCase(query, name) || StringUtils.startsWithIgnoringCase("\"" + query, quotedName)) {
if (best == null || name.length() > best.length()) {
best = name;
bestTable = table;
}
} else if (s.isEmpty() || name.startsWith(up) || quotedName.startsWith(up)) {
} else if (s.isEmpty() || StringUtils.startsWithIgnoringCase(name, query) || StringUtils.startsWithIgnoringCase(quotedName, query)) {
if (s.length() < name.length()) {
sentence.add(table.getQuotedName(),
table.getQuotedName().substring(s.length()),
Expand All @@ -147,16 +147,16 @@ public boolean autoComplete(Sentence sentence) {
if (query.indexOf(' ') < 0) {
break;
}
for (; i < up.length(); i++) {
char ch = up.charAt(i);
for (; i < query.length(); i++) {
char ch = query.charAt(i);
if (ch != '_' && !Character.isLetterOrDigit(ch)) {
break;
}
}
if (i == 0) {
break;
}
String alias = up.substring(0, i);
String alias = query.substring(0, i);
if (ParserUtil.isKeyword(alias, false)) {
break;
}
Expand All @@ -169,17 +169,17 @@ public boolean autoComplete(Sentence sentence) {
DbTableOrView last = sentence.getLastMatchedTable();
if (last != null && last.getColumns() != null) {
for (DbColumn column : last.getColumns()) {
String compare = up;
String name = StringUtils.toUpperEnglish(column.getName());
String compare = query;
String name = column.getName();
if (column.getQuotedName().length() > name.length()) {
name = column.getQuotedName();
compare = query;
}
if (compare.startsWith(name) && testColumnType(column)) {
if (StringUtils.startsWithIgnoringCase(compare, name) && testColumnType(column)) {
String b = s.substring(name.length());
if (best == null || b.length() < best.length()) {
best = b;
} else if (s.length() == 0 || name.startsWith(compare)) {
} else if (s.isEmpty() || StringUtils.startsWithIgnoringCase(name, compare)) {
if (s.length() < name.length()) {
sentence.add(column.getName(),
column.getName().substring(s.length()),
Expand All @@ -198,15 +198,14 @@ public boolean autoComplete(Sentence sentence) {
continue;
}
for (DbColumn column : table.getColumns()) {
String name = StringUtils.toUpperEnglish(column
.getName());
String name = column.getName();
if (testColumnType(column)) {
if (up.startsWith(name)) {
if (StringUtils.startsWithIgnoringCase(query, name)) {
String b = s.substring(name.length());
if (best == null || b.length() < best.length()) {
best = b;
}
} else if (s.length() == 0 || name.startsWith(up)) {
} else if (s.isEmpty() || StringUtils.startsWithIgnoringCase(name, query)) {
if (s.length() < name.length()) {
sentence.add(column.getName(),
column.getName().substring(s.length()),
Expand Down Expand Up @@ -329,7 +328,7 @@ private static String autoCompleteTableAlias(Sentence sentence,
return s;
}
s = s.substring(alias.length());
if (s.length() == 0) {
if (s.isEmpty()) {
sentence.add(alias + ".", ".", Sentence.CONTEXT);
}
return s;
Expand All @@ -344,15 +343,15 @@ private static String autoCompleteTableAlias(Sentence sentence,
(best == null || tableName.length() > best.length())) {
sentence.setLastMatchedTable(table);
best = tableName;
} else if (s.length() == 0 || tableName.startsWith(alias)) {
} else if (s.isEmpty() || tableName.startsWith(alias)) {
sentence.add(tableName + ".",
tableName.substring(s.length()) + ".",
Sentence.CONTEXT);
}
}
if (best != null) {
s = s.substring(best.length());
if (s.length() == 0) {
if (s.isEmpty()) {
sentence.add(alias + ".", ".", Sentence.CONTEXT);
}
return s;
Expand Down
37 changes: 37 additions & 0 deletions h2/src/main/org/h2/util/StringUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@
import java.lang.ref.SoftReference;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.concurrent.TimeUnit;
import java.util.function.IntPredicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.h2.api.ErrorCode;
import org.h2.engine.SysProperties;
Expand Down Expand Up @@ -1383,4 +1386,38 @@ public static String escapeMetaDataPattern(String pattern) {
return replaceAll(pattern, "\\", "\\\\");
}

/**
* Case-sensitive check if a {@param text} starts with a {@param prefix}.
* It only calls {@code String.startsWith()} and is only here for API consistency
*
* @param text the full text starting with a prefix
* @param prefix the full text starting with a prefix
* @return TRUE only if text starts with the prefix
*/
public static boolean startsWith(String text, String prefix) {
return text.startsWith(prefix);
}

/**
* Case-Insensitive check if a {@param text} starts with a {@param prefix}.
* It is used
*
* @param text the full text starting with a prefix
* @param prefix the full text starting with a prefix
* @return TRUE only if text starts with the prefix
*/
public static boolean startsWithIgnoringCase(String text, String prefix) {
String normalizedText = Normalizer.normalize(text, Normalizer.Form.NFD)
.replaceAll("\\p{M}", "");
String normalizedPrefix = Normalizer.normalize(prefix, Normalizer.Form.NFD)
.replaceAll("\\p{M}", "");

final Pattern pattern = Pattern.compile(

This comment has been minimized.

Copy link
@grandinj

grandinj Feb 1, 2024

Contributor

It would probably be cheaper to use

Collator collator = Collator.getInstance();
collator.setStrength(Collator.PRIMARY);
return collator.compare(text.substring(0, prefix.length), prefix) == Collator.EQUALS;

This comment has been minimized.

Copy link
@manticore-projects

manticore-projects Feb 1, 2024

Author Contributor

Thank you for your feed back.
Since its not a hot code path I wanted to write it as simple and robust as possible, but I don't mind replacing it with your code at all. Shall I?

This comment has been minimized.

Copy link
@grandinj

grandinj Feb 1, 2024

Contributor

Yeah, please lets rather use Collator, and let the JDK do the weird unicode work, rather then coding it ourselves.

"^" + normalizedPrefix
, Pattern.MULTILINE | Pattern.CASE_INSENSITIVE | Pattern.COMMENTS | Pattern.UNICODE_CASE);

final Matcher matcher = pattern.matcher(normalizedText);
return matcher.find();
}

}

3 comments on commit 136a77c

@manticore-projects
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really sorry to bother you, but this does not work. I have changed your code to make it working:

        Collator collator = Collator.getInstance();
        collator.setStrength(Collator.PRIMARY);
        return collator.compare(text.substring(0, prefix.length()), prefix)==0;

But this lets the tests fail.
I also tried all other constants Collator.SECONDARY, Collator.TERTIARY, NO_DECOMPOSITION, IDENTICAL, CANONICAL_DECOMPOSITION, FULL_DECOMPOSITION but the tests keep failing.

@manticore-projects
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the the java doc I understand that PRIMARY was our friend:

 ou can set a Collator's strength property to determine the level of difference considered significant in comparisons. Four strengths are provided: PRIMARY, SECONDARY, TERTIARY, and IDENTICAL. The exact assignment of strengths to language features is locale dependent. For example, in Czech, "e" and "f" are considered primary differences, while "e" and "ě" are secondary differences, "e" and "E" are tertiary differences and "e" and "e" are identical. The following shows how both case and accents could be ignored for US English.
  //Get the Collator for US English and set its strength to PRIMARY
  Collator usCollator = Collator.getInstance(Locale.US);
  usCollator.setStrength(Collator.PRIMARY);
  if( usCollator.compare("abc", "ABC") == 0 ) {
      System.out.println("Strings are equivalent");
  }

@manticore-projects
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those tests fail:

result = client.get(url, "autoCompleteList.do?query=select 'abc");
            assertContains(StringUtils.urlDecode(result), "'");
            result = client.get(url, "autoCompleteList.do?query=select 'abc''");
            assertContains(StringUtils.urlDecode(result), "'");

Please sign in to comment.