Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CAMEL-11420-Add contains ignore case operator to simple language #1773

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion buildingtools/src/main/resources/camel-checkstyle.xml
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ lengths, if/try depths, etc...
<module name="JUnitTestCase"/>
-->
<module name="ReturnCount">
<property name="max" value="20"/>
<property name="max" value="21"/>
<property name="maxForVoid" value="25"/>
</module>

Expand Down
2 changes: 2 additions & 0 deletions camel-core/src/main/docs/simple-language.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,8 @@ values)

|not contains |For testing if not contains in a string based value

|~~ |For testing if contains by ignoring case sensitivity in a string based value

|regex |For matching against a given regular expression pattern defined as a
String value

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,27 @@ protected String getOperationText() {
}
};
}

public static Predicate containsIgnoreCase(final Expression left, final Expression right) {
return new BinaryPredicateSupport(left, right) {

protected boolean matches(Exchange exchange, Object leftValue, Object rightValue) {
if (leftValue == null && rightValue == null) {
// they are equal
return true;
} else if (leftValue == null || rightValue == null) {
// only one of them is null so they are not equal
return false;
}

return ObjectHelper.containsIgnoreCase(leftValue, rightValue);
}

protected String getOperationText() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this should be ~~ or contains ignore case etc

return "~~";
}
};
}

public static Predicate isNull(final Expression expression) {
return new BinaryPredicateSupport(expression, ExpressionBuilder.constantExpression(null)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public final class SimpleTokenizer {
KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "is"));
KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not contains"));
KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "contains"));
KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "~~"));
KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not regex"));
KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "regex"));
KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not in"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ public Expression createExpression(String expression) {
return createExpression(leftExp, rightExp, PredicateBuilder.contains(leftExp, rightExp));
} else if (operator == BinaryOperatorType.NOT_CONTAINS) {
return createExpression(leftExp, rightExp, PredicateBuilder.not(PredicateBuilder.contains(leftExp, rightExp)));
} else if (operator == BinaryOperatorType.CONTAINS_IGNORECASE) {
return createExpression(leftExp, rightExp, PredicateBuilder.containsIgnoreCase(leftExp, rightExp));
} else if (operator == BinaryOperatorType.IS || operator == BinaryOperatorType.NOT_IS) {
return createIsExpression(expression, leftExp, rightExp);
} else if (operator == BinaryOperatorType.REGEX || operator == BinaryOperatorType.NOT_REGEX) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
*/
public enum BinaryOperatorType {

EQ, EQ_IGNORE, GT, GTE, LT, LTE, NOT_EQ, CONTAINS, NOT_CONTAINS, REGEX, NOT_REGEX,
EQ, EQ_IGNORE, GT, GTE, LT, LTE, NOT_EQ, CONTAINS, NOT_CONTAINS,
CONTAINS_IGNORECASE, REGEX, NOT_REGEX,
IN, NOT_IN, IS, NOT_IS, RANGE, NOT_RANGE, STARTS_WITH, ENDS_WITH;

public static BinaryOperatorType asOperator(String text) {
Expand All @@ -43,6 +44,8 @@ public static BinaryOperatorType asOperator(String text) {
return CONTAINS;
} else if ("not contains".equals(text)) {
return NOT_CONTAINS;
} else if ("~~".equals(text)) {
return CONTAINS_IGNORECASE;
} else if ("regex".equals(text)) {
return REGEX;
} else if ("not regex".equals(text)) {
Expand Down Expand Up @@ -86,6 +89,8 @@ public static String getOperatorText(BinaryOperatorType operator) {
return "contains";
} else if (operator == NOT_CONTAINS) {
return "not contains";
} else if (operator == CONTAINS_IGNORECASE) {
return "~~";
} else if (operator == REGEX) {
return "regex";
} else if (operator == NOT_REGEX) {
Expand Down Expand Up @@ -174,6 +179,8 @@ public static ParameterType[] supportedParameterTypes(BinaryOperatorType operato
return null;
} else if (operator == NOT_CONTAINS) {
return null;
} else if (operator == CONTAINS_IGNORECASE) {
return null;
} else if (operator == REGEX) {
return new ParameterType[]{ParameterType.Literal, ParameterType.Function};
} else if (operator == NOT_REGEX) {
Expand Down
37 changes: 37 additions & 0 deletions camel-core/src/main/java/org/apache/camel/util/ObjectHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,13 @@ public static int typeCoerceCompare(TypeConverter converter, Object leftValue, O
public static boolean equal(Object a, Object b) {
return equal(a, b, false);
}

/**
* A helper method for comparing objects for equality while handling case insensitivity
*/
public static boolean equalIgnoreCase(Object a, Object b) {
return equal(a, b, true);
}

/**
* A helper method for comparing objects for equality while handling nulls
Expand Down Expand Up @@ -647,6 +654,36 @@ public static boolean contains(Object collectionOrArray, Object value) {
}
return false;
}

/**
* Returns true if the collection contains the specified value by considering case insensitivity
*/
public static boolean containsIgnoreCase(Object collectionOrArray, Object value) {
// favor String types
if (collectionOrArray != null && (collectionOrArray instanceof StringBuffer || collectionOrArray instanceof StringBuilder)) {
collectionOrArray = collectionOrArray.toString();
}
if (value != null && (value instanceof StringBuffer || value instanceof StringBuilder)) {
value = value.toString();
}

if (collectionOrArray instanceof Collection) {
Collection<?> collection = (Collection<?>)collectionOrArray;
return collection.contains(value);
} else if (collectionOrArray instanceof String && value instanceof String) {
String str = (String)collectionOrArray;
String subStr = (String)value;
return StringHelper.containsIgnoreCase(str, subStr);
} else {
Iterator<Object> iter = createIterator(collectionOrArray);
while (iter.hasNext()) {
if (equalIgnoreCase(value, iter.next())) {
return true;
}
}
}
return false;
}

/**
* Creates an iterable over the value if the value is a collection, an
Expand Down
35 changes: 35 additions & 0 deletions camel-core/src/main/java/org/apache/camel/util/StringHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -672,5 +672,40 @@ public static String trimToNull(final String given) {

return trimmed;
}

/**
* Checks if the src string contains what
*
* @param src is the source string to be checked
* @param what is the string which will be looked up in the src argument
* @return true/false
*/
public static boolean containsIgnoreCase(String src, String what) {
if (src == null || what == null) {
return false;
}

final int length = what.length();
if (length == 0) {
return true; // Empty string is contained
}

final char firstLo = Character.toLowerCase(what.charAt(0));
final char firstUp = Character.toUpperCase(what.charAt(0));

for (int i = src.length() - length; i >= 0; i--) {
// Quick check before calling the more expensive regionMatches() method:
final char ch = src.charAt(i);
Copy link
Contributor

@snurmine snurmine Jun 19, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to camel-jmh using four test String you provided, plain regionMatches might be the fastest; Needs to run more iterations probably to get error smaller.
Mode Cnt Score Error Units
only src.regionMatches avgt 20 4937.268 ± 106.718 us/op
empty check+src.regionMatches avgt 20 4889.541 ± 38.962 us/op
with quick check avgt 20 7250.833 ± 49.024 us/op

Probably should test with cases where there are equal number of falses and trues. Result's ofc depend on the distribution of the data.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this work with src=aaa and str is aa.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, seems to wotk

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without quick check

Run complete. Total time: 00:00:28

Benchmark Mode Cnt Score Error Units
ContainsIgnoreCaseTest.benchmark thrpt 2 ≈ 10⁻⁵ ops/us
ContainsIgnoreCaseTest.benchmark avgt 2 250265,305 us/op
ContainsIgnoreCaseTest.benchmark sample 20 226072,986 ± 11368,864 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.00 sample 207618,048 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.50 sample 223608,832 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.90 sample 247699,866 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.95 sample 248499,405 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.99 sample 248512,512 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.999 sample 248512,512 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.9999 sample 248512,512 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p1.00 sample 248512,512 us/op
ContainsIgnoreCaseTest.benchmark ss 2 525478,432 us/op

With quick check

Run complete. Total time: 00:00:27

Benchmark Mode Cnt Score Error Units
ContainsIgnoreCaseTest.benchmark thrpt 2 ≈ 10⁻⁵ ops/us
ContainsIgnoreCaseTest.benchmark avgt 2 250323,754 us/op
ContainsIgnoreCaseTest.benchmark sample 20 205455,360 ± 19499,901 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.00 sample 180879,360 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.50 sample 198443,008 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.90 sample 248879,514 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.95 sample 258827,878 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.99 sample 259260,416 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.999 sample 259260,416 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.9999 sample 259260,416 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p1.00 sample 259260,416 us/op
ContainsIgnoreCaseTest.benchmark ss 2 254005,954 us/op

There is a slight difference.
I don't think jmh results are so accurate to measure such a single line. for now i'll prefer keeping it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You probably need are more iterartions since error is so large.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With quick check
10 Warmup / 20 Measurement Iterations

Run complete. Total time: 00:02:54

Benchmark Mode Cnt Score Error Units
ContainsIgnoreCaseTest.benchmark thrpt 20 ≈ 10⁻⁵ ops/us
ContainsIgnoreCaseTest.benchmark avgt 20 256533,689 ± 31954,114 us/op
ContainsIgnoreCaseTest.benchmark sample 206 206790,895 ± 5120,475 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.00 sample 162791,424 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.50 sample 203685,888 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.90 sample 237004,390 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.95 sample 245760,000 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.99 sample 263661,814 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.999 sample 307757,056 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.9999 sample 307757,056 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p1.00 sample 307757,056 us/op
ContainsIgnoreCaseTest.benchmark ss 20 232980,729 ± 17232,308 us/op

Without quick check

Run complete. Total time: 00:02:56

Benchmark Mode Cnt Score Error Units
ContainsIgnoreCaseTest.benchmark thrpt 20 ≈ 10⁻⁵ ops/us
ContainsIgnoreCaseTest.benchmark avgt 20 256596,411 ± 17866,599 us/op
ContainsIgnoreCaseTest.benchmark sample 196 228584,218 ± 5345,551 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.00 sample 190316,544 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.50 sample 224133,120 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.90 sample 258736,128 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.95 sample 273704,550 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.99 sample 298922,803 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.999 sample 301465,600 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p0.9999 sample 301465,600 us/op
ContainsIgnoreCaseTest.benchmark:benchmark·p1.00 sample 301465,600 us/op
ContainsIgnoreCaseTest.benchmark ss 20 257154,783 ± 18134,146 us/op

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

still no significant change. i guess it's not easy to measure that simple check.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had a bug in my tests, that is why results were better. It seems that this optimization is beneficial.

if (ch != firstLo && ch != firstUp) {
continue;
}

if (src.regionMatches(true, i, what, 0, length)) {
return true;
}
}

return false;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,13 @@ public void testNotContains() throws Exception {
assertPredicate("${in.header.foo} not contains 'abc'", false);
assertPredicate("${in.header.foo} not contains 'def'", true);
}

public void testContainsIgnoreCase() throws Exception {
assertPredicate("${in.header.foo} ~~ 'A'", true);
assertPredicate("${in.header.foo} ~~ 'Ab'", true);
assertPredicate("${in.header.foo} ~~ 'Abc'", true);
assertPredicate("${in.header.foo} ~~ 'defG'", false);
}

public void testRegex() throws Exception {
assertPredicate("${in.header.foo} regex '^a..$'", true);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.camel.itest.jmh;

import java.util.concurrent.TimeUnit;

import org.apache.camel.util.StringHelper;
import org.junit.Test;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.infra.Blackhole;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import org.openjdk.jmh.runner.options.TimeValue;

/**
* Tests the {@link StringHelper}.
* <p/>
* Thanks to this SO answer: https://stackoverflow.com/questions/30485856/how-to-run-jmh-from-inside-junit-tests
*/
public class ContainsIgnoreCaseTest {

@Test
public void launchBenchmark() throws Exception {
Options opt = new OptionsBuilder()
// Specify which benchmarks to run.
// You can be more specific if you'd like to run only one benchmark per test.
.include(this.getClass().getName() + ".*")
// Set the following options as needed
.mode(Mode.All)
.timeUnit(TimeUnit.MICROSECONDS)
.warmupTime(TimeValue.seconds(1))
.warmupIterations(2)
.measurementTime(TimeValue.seconds(1))
.measurementIterations(2)
.threads(2)
.forks(1)
.shouldFailOnError(true)
.shouldDoGC(true)
.build();

new Runner(opt).run();
}

// The JMH samples are the best documentation for how to use it
// http://hg.openjdk.java.net/code-tools/jmh/file/tip/jmh-samples/src/main/java/org/openjdk/jmh/samples/
@State(Scope.Thread)
public static class BenchmarkState {
@Setup(Level.Trial)
public void initialize() {
}
}

@Benchmark
@Measurement(batchSize = 1000000)
public void benchmark(BenchmarkState state, Blackhole bh) {
bh.consume(StringHelper.containsIgnoreCase("abc", "A"));
bh.consume(StringHelper.containsIgnoreCase("abc", "aB"));
bh.consume(StringHelper.containsIgnoreCase("abc", "aBc"));
bh.consume(StringHelper.containsIgnoreCase("abc", "ad"));
bh.consume(StringHelper.containsIgnoreCase("abc", "abD"));
bh.consume(StringHelper.containsIgnoreCase("abc", "ABD"));
}

}