Skip to content

Commit

Permalink
Improve boolean parsing performance (opensearch-project#11308)
Browse files Browse the repository at this point in the history
Signed-off-by: Ketan Verma <ketan9495@gmail.com>
Signed-off-by: Ketan Verma <9292653+ketanv3@users.noreply.github.com>
  • Loading branch information
ketanv3 authored and fahadshamiinsta committed Dec 4, 2023
1 parent c015993 commit 4e8386a
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 168 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Performance improvement for MultiTerm Queries on Keyword fields ([#7057](https://github.com/opensearch-project/OpenSearch/issues/7057))
- Disable concurrent aggs for Diversified Sampler and Sampler aggs ([#11087](https://github.com/opensearch-project/OpenSearch/issues/11087))
- Made leader/follower check timeout setting dynamic ([#10528](https://github.com/opensearch-project/OpenSearch/pull/10528))
- Improve boolean parsing performance ([#11308](https://github.com/opensearch-project/OpenSearch/pull/11308))

### Deprecated

Expand Down
183 changes: 70 additions & 113 deletions libs/common/src/main/java/org/opensearch/common/Booleans.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,30 +45,72 @@ private Booleans() {
/**
* Parses a char[] representation of a boolean value to <code>boolean</code>.
*
* @return <code>true</code> iff the sequence of chars is "true", <code>false</code> iff the sequence of chars is "false" or the
* provided default value iff either text is <code>null</code> or length == 0.
* @return <code>true</code> iff the sequence of chars is "true", <code>false</code> iff the sequence of
* chars is "false" or the provided default value iff either text is <code>null</code> or length == 0.
* @throws IllegalArgumentException if the string cannot be parsed to boolean.
*/
public static boolean parseBoolean(char[] text, int offset, int length, boolean defaultValue) {
if (text == null || length == 0) {
if (text == null) {
return defaultValue;
} else {
return parseBoolean(new String(text, offset, length));
}

switch (length) {
case 0:
return defaultValue;
case 1:
case 2:
case 3:
default:
break;
case 4:
if (text[offset] == 't' && text[offset + 1] == 'r' && text[offset + 2] == 'u' && text[offset + 3] == 'e') {
return true;
}
break;
case 5:
if (text[offset] == 'f'
&& text[offset + 1] == 'a'
&& text[offset + 2] == 'l'
&& text[offset + 3] == 's'
&& text[offset + 4] == 'e') {
return false;
}
break;
}

throw new IllegalArgumentException(
"Failed to parse value [" + new String(text, offset, length) + "] as only [true] or [false] are allowed."
);
}

/**
* returns true iff the sequence of chars is one of "true","false".
* Returns true iff the sequence of chars is one of "true", "false".
*
* @param text sequence to check
* @param offset offset to start
* @param length length to check
*/
public static boolean isBoolean(char[] text, int offset, int length) {
if (text == null || length == 0) {
if (text == null) {
return false;
}
return isBoolean(new String(text, offset, length));

switch (length) {
case 0:
case 1:
case 2:
case 3:
default:
return false;
case 4:
return text[offset] == 't' && text[offset + 1] == 'r' && text[offset + 2] == 'u' && text[offset + 3] == 'e';
case 5:
return text[offset] == 'f'
&& text[offset + 1] == 'a'
&& text[offset + 2] == 'l'
&& text[offset + 3] == 's'
&& text[offset + 4] == 'e';
}
}

public static boolean isBoolean(String value) {
Expand All @@ -91,63 +133,45 @@ public static boolean parseBoolean(String value) {
throw new IllegalArgumentException("Failed to parse value [" + value + "] as only [true] or [false] are allowed.");
}

private static boolean hasText(CharSequence str) {
if (str == null || str.length() == 0) {
return false;
}
int strLen = str.length();
for (int i = 0; i < strLen; i++) {
if (!Character.isWhitespace(str.charAt(i))) {
return true;
}
}
return false;
}

/**
* Parses a string representation of a boolean value to <code>boolean</code>.
* Note the subtle difference between this and {@link #parseBoolean(char[], int, int, boolean)}; this returns the
* default value even when the value is non-zero length containing all whitespaces (possibly overlooked, but
* preserving this behavior for compatibility reasons). Use {@link #parseBooleanStrict(String, boolean)} instead.
*
* @param value text to parse.
* @param defaultValue The default value to return if the provided value is <code>null</code>.
* @param defaultValue The default value to return if the provided value is <code>null</code> or blank.
* @return see {@link #parseBoolean(String)}
*/
@Deprecated
public static boolean parseBoolean(String value, boolean defaultValue) {
if (hasText(value)) {
return parseBoolean(value);
}
return defaultValue;
}

public static Boolean parseBoolean(String value, Boolean defaultValue) {
if (hasText(value)) {
return parseBoolean(value);
if (value == null || value.isBlank()) {
return defaultValue;
}
return defaultValue;
return parseBoolean(value);
}

/**
* Returns {@code false} if text is in "false", "0", "off", "no"; else, {@code true}.
*
* @deprecated Only kept to provide automatic upgrades for pre 6.0 indices. Use {@link #parseBoolean(String, Boolean)} instead.
*/
@Deprecated
public static Boolean parseBooleanLenient(String value, Boolean defaultValue) {
if (value == null) { // only for the null case we do that here!
public static Boolean parseBoolean(String value, Boolean defaultValue) {
if (value == null || value.isBlank()) {
return defaultValue;
}
return parseBooleanLenient(value, false);
return parseBoolean(value);
}

/**
* Returns {@code false} if text is in "false", "0", "off", "no"; else, {@code true}.
* Parses a string representation of a boolean value to <code>boolean</code>.
* Analogous to {@link #parseBoolean(char[], int, int, boolean)}.
*
* @deprecated Only kept to provide automatic upgrades for pre 6.0 indices. Use {@link #parseBoolean(String, boolean)} instead.
* @return <code>true</code> iff the sequence of chars is "true", <code>false</code> iff the sequence of
* chars is "false", or the provided default value iff either text is <code>null</code> or length == 0.
* @throws IllegalArgumentException if the string cannot be parsed to boolean.
*/
@Deprecated
public static boolean parseBooleanLenient(String value, boolean defaultValue) {
if (value == null) {
public static boolean parseBooleanStrict(String value, boolean defaultValue) {
if (value == null || value.length() == 0) {
return defaultValue;
}
return !(value.equals("false") || value.equals("0") || value.equals("off") || value.equals("no"));
return parseBoolean(value);
}

/**
Expand All @@ -163,71 +187,4 @@ public static boolean isFalse(String value) {
public static boolean isTrue(String value) {
return "true".equals(value);
}

/**
* Returns {@code false} if text is in "false", "0", "off", "no"; else, {@code true}.
*
* @deprecated Only kept to provide automatic upgrades for pre 6.0 indices. Use {@link #parseBoolean(char[], int, int, boolean)} instead
*/
@Deprecated
public static boolean parseBooleanLenient(char[] text, int offset, int length, boolean defaultValue) {
if (text == null || length == 0) {
return defaultValue;
}
if (length == 1) {
return text[offset] != '0';
}
if (length == 2) {
return !(text[offset] == 'n' && text[offset + 1] == 'o');
}
if (length == 3) {
return !(text[offset] == 'o' && text[offset + 1] == 'f' && text[offset + 2] == 'f');
}
if (length == 5) {
return !(text[offset] == 'f'
&& text[offset + 1] == 'a'
&& text[offset + 2] == 'l'
&& text[offset + 3] == 's'
&& text[offset + 4] == 'e');
}
return true;
}

/**
* returns true if the a sequence of chars is one of "true","false","on","off","yes","no","0","1"
*
* @param text sequence to check
* @param offset offset to start
* @param length length to check
*
* @deprecated Only kept to provide automatic upgrades for pre 6.0 indices. Use {@link #isBoolean(char[], int, int)} instead.
*/
@Deprecated
public static boolean isBooleanLenient(char[] text, int offset, int length) {
if (text == null || length == 0) {
return false;
}
if (length == 1) {
return text[offset] == '0' || text[offset] == '1';
}
if (length == 2) {
return (text[offset] == 'n' && text[offset + 1] == 'o') || (text[offset] == 'o' && text[offset + 1] == 'n');
}
if (length == 3) {
return (text[offset] == 'o' && text[offset + 1] == 'f' && text[offset + 2] == 'f')
|| (text[offset] == 'y' && text[offset + 1] == 'e' && text[offset + 2] == 's');
}
if (length == 4) {
return (text[offset] == 't' && text[offset + 1] == 'r' && text[offset + 2] == 'u' && text[offset + 3] == 'e');
}
if (length == 5) {
return (text[offset] == 'f'
&& text[offset + 1] == 'a'
&& text[offset + 2] == 'l'
&& text[offset + 3] == 's'
&& text[offset + 4] == 'e');
}
return false;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,7 @@

import org.opensearch.test.OpenSearchTestCase;

import java.util.Locale;

import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;

public class BooleansTests extends OpenSearchTestCase {
private static final String[] NON_BOOLEANS = new String[] {
Expand Down Expand Up @@ -81,8 +78,23 @@ public void testParseBooleanWithFallback() {
assertFalse(Booleans.parseBoolean(null, Boolean.FALSE));
assertTrue(Booleans.parseBoolean(null, Boolean.TRUE));

assertFalse(Booleans.parseBoolean("", false));
assertTrue(Booleans.parseBoolean("", true));
assertNull(Booleans.parseBoolean("", null));
assertFalse(Booleans.parseBoolean("", Boolean.FALSE));
assertTrue(Booleans.parseBoolean("", Boolean.TRUE));

assertFalse(Booleans.parseBoolean(" \t\n", false));
assertTrue(Booleans.parseBoolean(" \t\n", true));
assertNull(Booleans.parseBoolean(" \t\n", null));
assertFalse(Booleans.parseBoolean(" \t\n", Boolean.FALSE));
assertTrue(Booleans.parseBoolean(" \t\n", Boolean.TRUE));

assertTrue(Booleans.parseBoolean("true", randomFrom(Boolean.TRUE, Boolean.FALSE, null)));
assertFalse(Booleans.parseBoolean("false", randomFrom(Boolean.TRUE, Boolean.FALSE, null)));

assertTrue(Booleans.parseBoolean(new char[0], 0, 0, true));
assertFalse(Booleans.parseBoolean(new char[0], 0, 0, false));
}

public void testParseNonBooleanWithFallback() {
Expand All @@ -109,56 +121,12 @@ public void testParseNonBoolean() {
}
}

public void testIsBooleanLenient() {
String[] booleans = new String[] { "true", "false", "on", "off", "yes", "no", "0", "1" };
String[] notBooleans = new String[] { "11", "00", "sdfsdfsf", "F", "T" };
assertThat(Booleans.isBooleanLenient(null, 0, 1), is(false));

for (String b : booleans) {
String t = "prefix" + b + "suffix";
assertTrue(
"failed to recognize [" + b + "] as boolean",
Booleans.isBooleanLenient(t.toCharArray(), "prefix".length(), b.length())
);
}

for (String nb : notBooleans) {
String t = "prefix" + nb + "suffix";
assertFalse("recognized [" + nb + "] as boolean", Booleans.isBooleanLenient(t.toCharArray(), "prefix".length(), nb.length()));
}
}

public void testParseBooleanLenient() {
assertThat(Booleans.parseBooleanLenient(randomFrom("true", "on", "yes", "1"), randomBoolean()), is(true));
assertThat(Booleans.parseBooleanLenient(randomFrom("false", "off", "no", "0"), randomBoolean()), is(false));
assertThat(Booleans.parseBooleanLenient(randomFrom("true", "on", "yes").toUpperCase(Locale.ROOT), randomBoolean()), is(true));
assertThat(Booleans.parseBooleanLenient(null, false), is(false));
assertThat(Booleans.parseBooleanLenient(null, true), is(true));

assertThat(
Booleans.parseBooleanLenient(randomFrom("true", "on", "yes", "1"), randomFrom(Boolean.TRUE, Boolean.FALSE, null)),
is(true)
);
assertThat(
Booleans.parseBooleanLenient(randomFrom("false", "off", "no", "0"), randomFrom(Boolean.TRUE, Boolean.FALSE, null)),
is(false)
);
assertThat(
Booleans.parseBooleanLenient(
randomFrom("true", "on", "yes").toUpperCase(Locale.ROOT),
randomFrom(Boolean.TRUE, Boolean.FALSE, null)
),
is(true)
);
assertThat(Booleans.parseBooleanLenient(null, Boolean.FALSE), is(false));
assertThat(Booleans.parseBooleanLenient(null, Boolean.TRUE), is(true));
assertThat(Booleans.parseBooleanLenient(null, null), nullValue());

char[] chars = randomFrom("true", "on", "yes", "1").toCharArray();
assertThat(Booleans.parseBooleanLenient(chars, 0, chars.length, randomBoolean()), is(true));
chars = randomFrom("false", "off", "no", "0").toCharArray();
assertThat(Booleans.parseBooleanLenient(chars, 0, chars.length, randomBoolean()), is(false));
chars = randomFrom("true", "on", "yes").toUpperCase(Locale.ROOT).toCharArray();
assertThat(Booleans.parseBooleanLenient(chars, 0, chars.length, randomBoolean()), is(true));
public void testParseBooleanStrict() {
assertTrue(Booleans.parseBooleanStrict("true", false));
assertFalse(Booleans.parseBooleanStrict("false", true));
assertTrue(Booleans.parseBooleanStrict(null, true));
assertFalse(Booleans.parseBooleanStrict("", false));
expectThrows(IllegalArgumentException.class, () -> Booleans.parseBooleanStrict("foobar", false));
expectThrows(IllegalArgumentException.class, () -> Booleans.parseBooleanStrict(" \t\n", false));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ protected Boolean parseSourceValue(Object value) {
return (Boolean) value;
} else {
String textValue = value.toString();
return Booleans.parseBoolean(textValue.toCharArray(), 0, textValue.length(), false);
return Booleans.parseBooleanStrict(textValue, false);
}
}
};
Expand Down

0 comments on commit 4e8386a

Please sign in to comment.